From 0fdad7b82a08f9e10803342112679492c91a65a6 Mon Sep 17 00:00:00 2001 From: Somasree Majumder <56045049+soma2000-lang@users.noreply.github.com> Date: Fri, 3 Feb 2023 22:45:34 +0530 Subject: [PATCH 01/27] Removed the redundant call (#1352) --- keras_cv/models/object_detection/retina_net/retina_net.py | 1 - 1 file changed, 1 deletion(-) diff --git a/keras_cv/models/object_detection/retina_net/retina_net.py b/keras_cv/models/object_detection/retina_net/retina_net.py index 671be03eb0..8d3708eb6d 100644 --- a/keras_cv/models/object_detection/retina_net/retina_net.py +++ b/keras_cv/models/object_detection/retina_net/retina_net.py @@ -281,7 +281,6 @@ def compile( "`metrics` due to performance and distribution concerns. Please us the " "`PyCOCOCallback` to evaluate COCO metrics." ) - super().compile(**kwargs) if loss is not None: raise ValueError( "`RetinaNet` does not accept a `loss` to `compile()`. " From e416cec4eaa3f04e25d13f2f0ce27be26b6a27dd Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Fri, 3 Feb 2023 13:12:28 -0500 Subject: [PATCH 02/27] add serialization test to losses (#1345) * add serialization test * Fix isort * Fix lint issues * Fix lint issues * Update test util per ians comments * fix testutils --- keras_cv/layers/serialization_test.py | 71 ++------------------ keras_cv/losses/focal.py | 8 +-- keras_cv/losses/serialization_test.py | 60 +++++++++++++++++ keras_cv/utils/__init__.py | 1 + keras_cv/utils/test_utils.py | 94 +++++++++++++++++++++++++++ 5 files changed, 164 insertions(+), 70 deletions(-) create mode 100644 keras_cv/losses/serialization_test.py create mode 100644 keras_cv/utils/test_utils.py diff --git a/keras_cv/layers/serialization_test.py b/keras_cv/layers/serialization_test.py index a9144904e8..3fbd1c79f8 100644 --- a/keras_cv/layers/serialization_test.py +++ b/keras_cv/layers/serialization_test.py @@ -11,78 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import inspect import tensorflow as tf from absl.testing import parameterized -from keras_cv import core from keras_cv import layers as cv_layers from keras_cv.layers.vit_layers import PatchingAndEmbedding - - -def exhaustive_compare(obj1, obj2): - classes_supporting_get_config = ( - core.FactorSampler, - tf.keras.layers.Layer, - cv_layers.BaseImageAugmentationLayer, - ) - - # If both objects are either one of list or tuple then their individual - # elements also must be checked exhaustively. - if isinstance(obj1, (list, tuple)) and isinstance(obj2, (list, tuple)): - # Length based checks. - if len(obj1) == 0 and len(obj2) == 0: - return True - if len(obj1) != len(obj2): - return False - - # Exhaustive check for all elements. - for v1, v2 in list(zip(obj1, obj2)): - return exhaustive_compare(v1, v2) - - # If the objects are dicts then we simply call the `config_equals` function - # which supports dicts. - elif isinstance(obj1, (dict)) and isinstance(obj2, (dict)): - return config_equals(v1, v2) - - # If both objects are subclasses of Keras classes that support `get_config` - # method, then we compare their individual attributes using `config_equals`. - elif isinstance(obj1, classes_supporting_get_config) and isinstance( - obj2, classes_supporting_get_config - ): - return config_equals(obj1.get_config(), obj2.get_config()) - - # Following checks are if either of the objects are _functions_, not methods - # or callables, since Layers and other unforeseen objects may also fit into - # this category. Specifically for Keras activation functions. - elif inspect.isfunction(obj1) and inspect.isfunction(obj2): - return tf.keras.utils.serialize_keras_object( - obj1 - ) == tf.keras.utils.serialize_keras_object(obj2) - elif inspect.isfunction(obj1) and not inspect.isfunction(obj2): - return tf.keras.utils.serialize_keras_object(obj1) == obj2 - elif inspect.isfunction(obj2) and not inspect.isfunction(obj1): - return obj1 == tf.keras.utils.serialize_keras_object(obj2) - - # Lastly check for primitive datatypes and objects that don't need - # additional preprocessing. - else: - return obj1 == obj2 - - -def config_equals(config1, config2): - # Both `config1` and `config2` are python dicts. So the first check is to - # see if both of them have same keys. - if config1.keys() != config2.keys(): - return False - - # Iterate over all keys of the configs and compare each entry exhaustively. - for key in list(config1.keys()): - v1, v2 = config1[key], config2[key] - if not exhaustive_compare(v1, v2): - return False - return True +from keras_cv.utils import test_utils class SerializationTest(tf.test.TestCase, parameterized.TestCase): @@ -458,7 +395,9 @@ def test_layer_serialization(self, layer_cls, init_args): reconstructed_layer = reconstructed_model.layers[0] self.assertTrue( - config_equals(layer.get_config(), reconstructed_layer.get_config()) + test_utils.config_equals( + layer.get_config(), reconstructed_layer.get_config() + ) ) def assertAllInitParametersAreInConfig(self, layer_cls, config): diff --git a/keras_cv/losses/focal.py b/keras_cv/losses/focal.py index 52d0a26e09..3e96709f2b 100644 --- a/keras_cv/losses/focal.py +++ b/keras_cv/losses/focal.py @@ -63,8 +63,8 @@ def __init__( **kwargs, ): super().__init__(**kwargs) - self._alpha = float(alpha) - self._gamma = float(gamma) + self.alpha = float(alpha) + self.gamma = float(gamma) self.from_logits = from_logits self.label_smoothing = label_smoothing @@ -83,9 +83,9 @@ def call(self, y_true, y_pred): cross_entropy = K.binary_crossentropy(y_true, y_pred) - alpha = tf.where(tf.equal(y_true, 1.0), self._alpha, (1.0 - self._alpha)) + alpha = tf.where(tf.equal(y_true, 1.0), self.alpha, (1.0 - self.alpha)) pt = y_true * y_pred + (1.0 - y_true) * (1.0 - y_pred) - loss = alpha * tf.pow(1.0 - pt, self._gamma) * cross_entropy + loss = alpha * tf.pow(1.0 - pt, self.gamma) * cross_entropy # In most losses you mean over the final axis to achieve a scalar # Focal loss however is a special case in that it is meant to focus on # a small number of hard examples in a batch. Most of the time this diff --git a/keras_cv/losses/serialization_test.py b/keras_cv/losses/serialization_test.py new file mode 100644 index 0000000000..dcdac018bf --- /dev/null +++ b/keras_cv/losses/serialization_test.py @@ -0,0 +1,60 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import inspect + +import tensorflow as tf +from absl.testing import parameterized + +from keras_cv import losses as cv_losses +from keras_cv.utils import test_utils + + +class SerializationTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.named_parameters( + ( + "FocalLoss", + cv_losses.FocalLoss, + {"alpha": 0.25, "gamma": 2, "from_logits": True}, + ), + ("GIoULoss", cv_losses.GIoULoss, {"bounding_box_format": "xywh"}), + ( + "BinaryPenaltyReducedFocalCrossEntropy", + cv_losses.BinaryPenaltyReducedFocalCrossEntropy, + {}, + ), + ("SimCLRLoss", cv_losses.SimCLRLoss, {"temperature": 0.5}), + ("SmoothL1Loss", cv_losses.SmoothL1Loss, {}), + ) + def test_loss_serialization(self, loss_cls, init_args): + loss = loss_cls(**init_args) + config = loss.get_config() + self.assertAllInitParametersAreInConfig(loss_cls, config) + + reconstructed_loss = loss_cls.from_config(config) + + self.assertTrue( + test_utils.config_equals(loss.get_config(), reconstructed_loss.get_config()) + ) + + def assertAllInitParametersAreInConfig(self, loss_cls, config): + excluded_name = ["args", "kwargs", "*"] + parameter_names = { + v + for v in inspect.signature(loss_cls).parameters.keys() + if v not in excluded_name + } + + intersection_with_config = {v for v in config.keys() if v in parameter_names} + + self.assertSetEqual(parameter_names, intersection_with_config) diff --git a/keras_cv/utils/__init__.py b/keras_cv/utils/__init__.py index 0068763e1c..e4e601fadb 100644 --- a/keras_cv/utils/__init__.py +++ b/keras_cv/utils/__init__.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from keras_cv.utils import test_utils from keras_cv.utils.fill_utils import fill_rectangle from keras_cv.utils.preprocessing import blend from keras_cv.utils.preprocessing import ensure_tensor diff --git a/keras_cv/utils/test_utils.py b/keras_cv/utils/test_utils.py new file mode 100644 index 0000000000..1145d23831 --- /dev/null +++ b/keras_cv/utils/test_utils.py @@ -0,0 +1,94 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect + +import tensorflow as tf + +from keras_cv import core + + +def exhaustive_compare(obj1, obj2): + """Exhaustively compared config of any two python or Keras objects recursively. + + If objects are python objects, a standard equality check is run. If the objects are + Keras objects a `get_config()` call is made. The subsequent configs are then + compared to determine if equality holds. + + Args: + obj1: any object, can be a Keras object or python object. + obj2: any object, can be a Keras object or python object. + """ + + classes_supporting_get_config = ( + core.FactorSampler, + tf.keras.layers.Layer, + tf.keras.losses.Loss, + ) + + # If both objects are either one of list or tuple then their individual + # elements also must be checked exhaustively. + if isinstance(obj1, (list, tuple)) and isinstance(obj2, (list, tuple)): + # Length based checks. + if len(obj1) == 0 and len(obj2) == 0: + return True + if len(obj1) != len(obj2): + return False + + # Exhaustive check for all elements. + for v1, v2 in list(zip(obj1, obj2)): + return exhaustive_compare(v1, v2) + + # If the objects are dicts then we simply call the `config_equals` function + # which supports dicts. + elif isinstance(obj1, (dict)) and isinstance(obj2, (dict)): + return config_equals(v1, v2) + + # If both objects are subclasses of Keras classes that support `get_config` + # method, then we compare their individual attributes using `config_equals`. + elif isinstance(obj1, classes_supporting_get_config) and isinstance( + obj2, classes_supporting_get_config + ): + return config_equals(obj1.get_config(), obj2.get_config()) + + # Following checks are if either of the objects are _functions_, not methods + # or callables, since Layers and other unforeseen objects may also fit into + # this category. Specifically for Keras activation functions. + elif inspect.isfunction(obj1) and inspect.isfunction(obj2): + return tf.keras.utils.serialize_keras_object( + obj1 + ) == tf.keras.utils.serialize_keras_object(obj2) + elif inspect.isfunction(obj1) and not inspect.isfunction(obj2): + return tf.keras.utils.serialize_keras_object(obj1) == obj2 + elif inspect.isfunction(obj2) and not inspect.isfunction(obj1): + return obj1 == tf.keras.utils.serialize_keras_object(obj2) + + # Lastly check for primitive datatypes and objects that don't need + # additional preprocessing. + else: + return obj1 == obj2 + + +def config_equals(config1, config2): + # Both `config1` and `config2` are python dicts. So the first check is to + # see if both of them have same keys. + if config1.keys() != config2.keys(): + return False + + # Iterate over all keys of the configs and compare each entry exhaustively. + for key in list(config1.keys()): + v1, v2 = config1[key], config2[key] + if not exhaustive_compare(v1, v2): + return False + return True From d1d3bc40308a84407bb06bd725bac921fb89bda8 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Fri, 3 Feb 2023 14:00:18 -0500 Subject: [PATCH 03/27] Fixes weight loading for gs:// (#1355) --- keras_cv/models/weights.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/keras_cv/models/weights.py b/keras_cv/models/weights.py index 7ff54389c3..08053e1e1b 100644 --- a/keras_cv/models/weights.py +++ b/keras_cv/models/weights.py @@ -15,7 +15,7 @@ def parse_weights(weights, include_top, model_type): - if not weights or tf.io.gfile.exists(weights): + if not weights: return weights if weights.startswith("gs://"): weights = weights.replace("gs://", "https://storage.googleapis.com/") @@ -23,6 +23,8 @@ def parse_weights(weights, include_top, model_type): origin=weights, cache_subdir="models", ) + if tf.io.gfile.exists(weights): + return weights if weights in ALIASES[model_type]: weights = ALIASES[model_type][weights] if weights in WEIGHTS_CONFIG[model_type]: From 20159434f7894276441829fabf6c874fa053b6e9 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Fri, 3 Feb 2023 18:02:12 -0500 Subject: [PATCH 04/27] remove pad_batch_to_shape (#1346) * remove pad_batch_to_shape * remove pad to batch shape * to_dense * fix lint --- ...an_average_precision_bucket_performance.py | 5 +- .../mean_average_precision_performance.py | 5 +- benchmarks/metrics/coco/recall_performance.py | 5 +- keras_cv/bounding_box/__init__.py | 1 - keras_cv/bounding_box/pad_batch_to_shape.py | 68 ------------------- .../bounding_box/pad_batch_to_shape_test.py | 41 ----------- .../metrics/coco/mean_average_precision.py | 2 +- .../coco/mean_average_precision_test.py | 4 -- keras_cv/metrics/coco/recall.py | 2 +- 9 files changed, 5 insertions(+), 128 deletions(-) delete mode 100644 keras_cv/bounding_box/pad_batch_to_shape.py delete mode 100644 keras_cv/bounding_box/pad_batch_to_shape_test.py diff --git a/benchmarks/metrics/coco/mean_average_precision_bucket_performance.py b/benchmarks/metrics/coco/mean_average_precision_bucket_performance.py index da8bcb365e..b059f7ca58 100644 --- a/benchmarks/metrics/coco/mean_average_precision_bucket_performance.py +++ b/benchmarks/metrics/coco/mean_average_precision_bucket_performance.py @@ -35,10 +35,7 @@ def produce_random_data(include_confidence=False, num_images=128, classes=20): ) ) - images = [ - keras_cv.bounding_box.pad_batch_to_shape(x, [25, images[0].shape[1]]) - for x in images - ] + images = [keras_cv.bounding_box.to_dense(x, max_boxes=25) for x in images] return tf.stack(images, axis=0) diff --git a/benchmarks/metrics/coco/mean_average_precision_performance.py b/benchmarks/metrics/coco/mean_average_precision_performance.py index f19d95cd40..6bf20863b4 100644 --- a/benchmarks/metrics/coco/mean_average_precision_performance.py +++ b/benchmarks/metrics/coco/mean_average_precision_performance.py @@ -35,10 +35,7 @@ def produce_random_data(include_confidence=False, num_images=128, classes=20): ) ) - images = [ - keras_cv.bounding_box.pad_batch_to_shape(x, [25, images[0].shape[1]]) - for x in images - ] + images = [keras_cv.bounding_box.to_dense(x, max_boxes=25) for x in images] return tf.stack(images, axis=0) diff --git a/benchmarks/metrics/coco/recall_performance.py b/benchmarks/metrics/coco/recall_performance.py index 8bfa2791b9..d20aeb879d 100644 --- a/benchmarks/metrics/coco/recall_performance.py +++ b/benchmarks/metrics/coco/recall_performance.py @@ -35,10 +35,7 @@ def produce_random_data(include_confidence=False, num_images=128, classes=20): ) ) - images = [ - keras_cv.bounding_box.pad_batch_to_shape(x, [25, images[0].shape[1]]) - for x in images - ] + images = [keras_cv.bounding_box.to_dense(x, max_boxes=25) for x in images] return tf.stack(images, axis=0) diff --git a/keras_cv/bounding_box/__init__.py b/keras_cv/bounding_box/__init__.py index fe8b3b5bb3..92f7169665 100644 --- a/keras_cv/bounding_box/__init__.py +++ b/keras_cv/bounding_box/__init__.py @@ -23,7 +23,6 @@ from keras_cv.bounding_box.formats import YXYX from keras_cv.bounding_box.iou import compute_iou from keras_cv.bounding_box.mask_invalid_detections import mask_invalid_detections -from keras_cv.bounding_box.pad_batch_to_shape import pad_batch_to_shape from keras_cv.bounding_box.to_dense import to_dense from keras_cv.bounding_box.to_ragged import to_ragged from keras_cv.bounding_box.utils import as_relative diff --git a/keras_cv/bounding_box/pad_batch_to_shape.py b/keras_cv/bounding_box/pad_batch_to_shape.py deleted file mode 100644 index 94bcc732e9..0000000000 --- a/keras_cv/bounding_box/pad_batch_to_shape.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright 2022 The KerasCV Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import tensorflow as tf - - -def pad_batch_to_shape(bounding_boxes, target_shape, padding_values=-1): - """Pads a list of bounding boxes with -1s. - - Boxes represented by all -1s are ignored by COCO metrics. - - Sample usage: - bounding_box = [[1, 2, 3, 4], [5, 6, 7, 8]] # 2 bounding_boxes with with xywh or - corners format. - target_shape = [3, 4] # Add 1 more dummy bounding_box - result = pad_batch_to_shape(bounding_box, target_shape) - # result == [[1, 2, 3, 4], [5, 6, 7, 8], [-1, -1, -1, -1]] - - target_shape = [2, 5] # Add 1 more index after the current 4 coordinates. - result = pad_batch_to_shape(bounding_box, target_shape) - # result == [[1, 2, 3, 4, -1], [5, 6, 7, 8, -1]] - - Args: - bounding_boxes: tf.Tensor of bounding boxes in any format. - target_shape: Target shape to pad bounding box to. This should have the same - rank as the bounding_boxes. Note that if the target_shape contains any - dimension that is smaller than the bounding box shape, then no value will be - padded. - padding_values: value to pad, defaults to -1 to mask out in coco metrics. - Returns: - bounding_boxes padded to target shape. - - Raises: - ValueError, when target shape has smaller rank or dimension value when - comparing with shape of bounding boxes. - """ - bounding_box_shape = tf.shape(bounding_boxes) - if len(bounding_box_shape) != len(target_shape): - raise ValueError( - "Target shape should have same rank as the bounding box. " - f"Got bounding_box shape = {bounding_box_shape}, " - f"target_shape = {target_shape}" - ) - for dim in range(len(target_shape)): - if bounding_box_shape[dim] > target_shape[dim]: - raise ValueError( - "Target shape should be larger than bounding box shape " - "in all dimensions. " - f"Got bounding_box shape = {bounding_box_shape}, " - f"target_shape = {target_shape}" - ) - paddings = [ - [0, target_shape[dim] - bounding_box_shape[dim]] - for dim in range(len(target_shape)) - ] - return tf.pad( - bounding_boxes, paddings, mode="CONSTANT", constant_values=padding_values - ) diff --git a/keras_cv/bounding_box/pad_batch_to_shape_test.py b/keras_cv/bounding_box/pad_batch_to_shape_test.py deleted file mode 100644 index 37e6c12df6..0000000000 --- a/keras_cv/bounding_box/pad_batch_to_shape_test.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2022 The KerasCV Authors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import tensorflow as tf - -from keras_cv import bounding_box - - -class PadBatchToShapeTest(tf.test.TestCase): - def test_bounding_box_padding(self): - bounding_boxes = [[1, 2, 3, 4], [5, 6, 7, 8]] - target_shape = [3, 4] - result = bounding_box.pad_batch_to_shape(bounding_boxes, target_shape) - self.assertAllClose(result, [[1, 2, 3, 4], [5, 6, 7, 8], [-1, -1, -1, -1]]) - - target_shape = [2, 5] - result = bounding_box.pad_batch_to_shape(bounding_boxes, target_shape) - self.assertAllClose(result, [[1, 2, 3, 4, -1], [5, 6, 7, 8, -1]]) - - # Make sure to raise error if the rank is different between bounding_box and - # target shape - with self.assertRaisesRegex(ValueError, "Target shape should have same rank"): - bounding_box.pad_batch_to_shape(bounding_boxes, [1, 2, 3]) - - # Make sure raise error if the target shape is smaller - target_shape = [3, 2] - with self.assertRaisesRegex( - ValueError, "Target shape should be larger than bounding box shape" - ): - bounding_box.pad_batch_to_shape(bounding_boxes, target_shape) diff --git a/keras_cv/metrics/coco/mean_average_precision.py b/keras_cv/metrics/coco/mean_average_precision.py index 83a6c6f577..6c2a8be251 100644 --- a/keras_cv/metrics/coco/mean_average_precision.py +++ b/keras_cv/metrics/coco/mean_average_precision.py @@ -70,7 +70,7 @@ class _COCOMeanAveragePrecision(tf.keras.metrics.Metric): account for this, you may either pass a `tf.RaggedTensor`, or pad Tensors with `-1`s to indicate unused boxes. A utility function to perform this padding is available at - `keras_cv.bounding_box.pad_batch_to_shape()`. + `keras_cv.bounding_box.to_dense()`. ```python coco_map = keras_cv.metrics._COCOMeanAveragePrecision( diff --git a/keras_cv/metrics/coco/mean_average_precision_test.py b/keras_cv/metrics/coco/mean_average_precision_test.py index 30c920ca57..aa56260f9d 100644 --- a/keras_cv/metrics/coco/mean_average_precision_test.py +++ b/keras_cv/metrics/coco/mean_average_precision_test.py @@ -17,7 +17,6 @@ import tensorflow as tf from tensorflow import keras -from keras_cv import bounding_box from keras_cv.metrics import _COCOMeanAveragePrecision @@ -210,7 +209,6 @@ def DISABLE_test_counting_with_missing_class_present_in_data(self): [[[0, 50, 100, 150, 1, 1.0], [0, 50, 100, 150, 33, 1.0]]], dtype=tf.float32 ) - y_true = bounding_box.pad_batch_to_shape(y_true, (1, 20, 5)) metric = _COCOMeanAveragePrecision( bounding_box_format="xyxy", iou_thresholds=[0.15], @@ -226,8 +224,6 @@ def DISABLE_test_bounding_box_counting(self): y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float64) y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) - y_true = bounding_box.pad_batch_to_shape(y_true, (1, 20, 5)) - metric = _COCOMeanAveragePrecision( bounding_box_format="xyxy", iou_thresholds=[0.15], diff --git a/keras_cv/metrics/coco/recall.py b/keras_cv/metrics/coco/recall.py index c18c179187..c97376b8b0 100644 --- a/keras_cv/metrics/coco/recall.py +++ b/keras_cv/metrics/coco/recall.py @@ -61,7 +61,7 @@ class _COCORecall(keras.metrics.Metric): account for this, you may either pass a `tf.RaggedTensor`, or pad Tensors with `-1`s to indicate unused boxes. A utility function to perform this padding is available at - `keras_cv.bounding_box.pad_batch_to_shape`. + `keras_cv.bounding_box.to_dense`. ```python coco_recall = keras_cv.metrics._COCORecall( From ab9fce50c53c4e7f21092d971df6a1665d9026a2 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Fri, 3 Feb 2023 19:28:43 -0500 Subject: [PATCH 05/27] v0.4.2 (#1354) --- keras_cv/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_cv/__init__.py b/keras_cv/__init__.py index 198e15abcf..0a14ebde9b 100644 --- a/keras_cv/__init__.py +++ b/keras_cv/__init__.py @@ -31,4 +31,4 @@ from keras_cv.core import NormalFactorSampler from keras_cv.core import UniformFactorSampler -__version__ = "0.4.1" +__version__ = "0.4.2" From d53dbac75716348ecfc7dec1660804d056668c64 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Mon, 6 Feb 2023 17:13:12 -0500 Subject: [PATCH 06/27] move ops.point_cloud => point_cloud namespace (#1353) * move pointcloud namespace * move pointcloud namespace * move pointcloud namespace * move pointcloud namespace * Fix pointcloud imports * Fix pointcloud import names * Update point cloud * Update point cloud --- .../frustum_random_dropping_points.py | 2 +- .../frustum_random_point_feature_noise.py | 2 +- .../preprocessing_3d/global_random_flip.py | 2 +- .../global_random_rotation.py | 4 +- .../global_random_translation.py | 2 +- .../group_points_by_bounding_boxes.py | 4 +- .../preprocessing_3d/random_copy_paste.py | 2 +- .../preprocessing_3d/random_drop_box.py | 2 +- .../preprocessing_3d/swap_background.py | 2 +- keras_cv/ops/__init__.py | 12 ------ keras_cv/point_cloud/__init__.py | 27 +++++++++++++ keras_cv/{ops => point_cloud}/point_cloud.py | 0 .../{ops => point_cloud}/point_cloud_test.py | 38 +++++++++---------- .../within_box_3d_test.py | 14 +++---- 14 files changed, 63 insertions(+), 50 deletions(-) create mode 100644 keras_cv/point_cloud/__init__.py rename keras_cv/{ops => point_cloud}/point_cloud.py (100%) rename keras_cv/{ops => point_cloud}/point_cloud_test.py (91%) rename keras_cv/{ops => point_cloud}/within_box_3d_test.py (93%) diff --git a/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points.py b/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points.py index 3bbb471898..0aca6cb64f 100644 --- a/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points.py +++ b/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points.py @@ -14,8 +14,8 @@ import tensorflow as tf +from keras_cv import point_cloud from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.ops import point_cloud POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py b/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py index df6c198c1f..8fc2d087f3 100644 --- a/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py +++ b/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py @@ -14,8 +14,8 @@ import tensorflow as tf +from keras_cv import point_cloud from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.ops import point_cloud POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/layers/preprocessing_3d/global_random_flip.py b/keras_cv/layers/preprocessing_3d/global_random_flip.py index 8d83d52291..93a86f0ce4 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_flip.py +++ b/keras_cv/layers/preprocessing_3d/global_random_flip.py @@ -16,7 +16,7 @@ from keras_cv.bounding_box_3d import CENTER_XYZ_DXDYDZ_PHI from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.ops.point_cloud import wrap_angle_radians +from keras_cv.point_cloud import wrap_angle_radians POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/layers/preprocessing_3d/global_random_rotation.py b/keras_cv/layers/preprocessing_3d/global_random_rotation.py index 64149827c7..5cdd95e1d9 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_rotation.py +++ b/keras_cv/layers/preprocessing_3d/global_random_rotation.py @@ -16,8 +16,8 @@ from keras_cv.bounding_box_3d import CENTER_XYZ_DXDYDZ_PHI from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.ops.point_cloud import coordinate_transform -from keras_cv.ops.point_cloud import wrap_angle_radians +from keras_cv.point_cloud import coordinate_transform +from keras_cv.point_cloud import wrap_angle_radians POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/layers/preprocessing_3d/global_random_translation.py b/keras_cv/layers/preprocessing_3d/global_random_translation.py index 04940730c1..1fd6092721 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_translation.py +++ b/keras_cv/layers/preprocessing_3d/global_random_translation.py @@ -16,7 +16,7 @@ from keras_cv.bounding_box_3d import CENTER_XYZ_DXDYDZ_PHI from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.ops.point_cloud import coordinate_transform +from keras_cv.point_cloud import coordinate_transform POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes.py b/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes.py index aee163e2a8..109407d1eb 100644 --- a/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes.py +++ b/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes.py @@ -16,8 +16,8 @@ from keras_cv.bounding_box_3d import CENTER_XYZ_DXDYDZ_PHI from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.ops.point_cloud import group_points_by_boxes -from keras_cv.ops.point_cloud import is_within_box3d +from keras_cv.point_cloud import group_points_by_boxes +from keras_cv.point_cloud import is_within_box3d POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/layers/preprocessing_3d/random_copy_paste.py b/keras_cv/layers/preprocessing_3d/random_copy_paste.py index 419cd86862..2f6512e8ca 100644 --- a/keras_cv/layers/preprocessing_3d/random_copy_paste.py +++ b/keras_cv/layers/preprocessing_3d/random_copy_paste.py @@ -17,7 +17,7 @@ from keras_cv.bounding_box_3d import CENTER_XYZ_DXDYDZ_PHI from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d from keras_cv.ops import iou_3d -from keras_cv.ops.point_cloud import is_within_any_box3d +from keras_cv.point_cloud import is_within_any_box3d POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/layers/preprocessing_3d/random_drop_box.py b/keras_cv/layers/preprocessing_3d/random_drop_box.py index 09078f10e9..3b1d961e01 100644 --- a/keras_cv/layers/preprocessing_3d/random_drop_box.py +++ b/keras_cv/layers/preprocessing_3d/random_drop_box.py @@ -15,7 +15,7 @@ import tensorflow as tf from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.ops.point_cloud import is_within_any_box3d +from keras_cv.point_cloud import is_within_any_box3d POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/layers/preprocessing_3d/swap_background.py b/keras_cv/layers/preprocessing_3d/swap_background.py index 22b17899b4..227242e874 100644 --- a/keras_cv/layers/preprocessing_3d/swap_background.py +++ b/keras_cv/layers/preprocessing_3d/swap_background.py @@ -16,7 +16,7 @@ from keras_cv.bounding_box_3d import CENTER_XYZ_DXDYDZ_PHI from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.ops.point_cloud import is_within_any_box3d +from keras_cv.point_cloud import is_within_any_box3d POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES diff --git a/keras_cv/ops/__init__.py b/keras_cv/ops/__init__.py index 0f761f4cfe..2122dfcf75 100644 --- a/keras_cv/ops/__init__.py +++ b/keras_cv/ops/__init__.py @@ -13,15 +13,3 @@ # limitations under the License. from keras_cv.ops.iou_3d import iou_3d -from keras_cv.ops.point_cloud import _box_area -from keras_cv.ops.point_cloud import _center_xyzWHD_to_corner_xyz -from keras_cv.ops.point_cloud import _is_on_lefthand_side -from keras_cv.ops.point_cloud import coordinate_transform -from keras_cv.ops.point_cloud import group_points_by_boxes -from keras_cv.ops.point_cloud import is_within_any_box3d -from keras_cv.ops.point_cloud import is_within_any_box3d_v2 -from keras_cv.ops.point_cloud import is_within_box2d -from keras_cv.ops.point_cloud import is_within_box3d -from keras_cv.ops.point_cloud import spherical_coordinate_transform -from keras_cv.ops.point_cloud import within_a_frustum -from keras_cv.ops.point_cloud import within_box3d_index diff --git a/keras_cv/point_cloud/__init__.py b/keras_cv/point_cloud/__init__.py new file mode 100644 index 0000000000..6e8299ef87 --- /dev/null +++ b/keras_cv/point_cloud/__init__.py @@ -0,0 +1,27 @@ +# Copyright 2023 The KerasCV Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_cv.point_cloud.point_cloud import _box_area +from keras_cv.point_cloud.point_cloud import _center_xyzWHD_to_corner_xyz +from keras_cv.point_cloud.point_cloud import _is_on_lefthand_side +from keras_cv.point_cloud.point_cloud import coordinate_transform +from keras_cv.point_cloud.point_cloud import group_points_by_boxes +from keras_cv.point_cloud.point_cloud import is_within_any_box3d +from keras_cv.point_cloud.point_cloud import is_within_any_box3d_v2 +from keras_cv.point_cloud.point_cloud import is_within_box2d +from keras_cv.point_cloud.point_cloud import is_within_box3d +from keras_cv.point_cloud.point_cloud import spherical_coordinate_transform +from keras_cv.point_cloud.point_cloud import within_a_frustum +from keras_cv.point_cloud.point_cloud import within_box3d_index +from keras_cv.point_cloud.point_cloud import wrap_angle_radians diff --git a/keras_cv/ops/point_cloud.py b/keras_cv/point_cloud/point_cloud.py similarity index 100% rename from keras_cv/ops/point_cloud.py rename to keras_cv/point_cloud/point_cloud.py diff --git a/keras_cv/ops/point_cloud_test.py b/keras_cv/point_cloud/point_cloud_test.py similarity index 91% rename from keras_cv/ops/point_cloud_test.py rename to keras_cv/point_cloud/point_cloud_test.py index 8d12a709ce..f1bc7a7d3a 100644 --- a/keras_cv/ops/point_cloud_test.py +++ b/keras_cv/point_cloud/point_cloud_test.py @@ -19,15 +19,13 @@ import tensorflow as tf from absl.testing import parameterized -from keras_cv import ops +from keras_cv import point_cloud class AngleTest(tf.test.TestCase): def test_wrap_angle_radians(self): - self.assertAllClose( - -np.pi + 0.1, ops.point_cloud.wrap_angle_radians(np.pi + 0.1) - ) - self.assertAllClose(0.0, ops.point_cloud.wrap_angle_radians(2 * np.pi)) + self.assertAllClose(-np.pi + 0.1, point_cloud.wrap_angle_radians(np.pi + 0.1)) + self.assertAllClose(0.0, point_cloud.wrap_angle_radians(2 * np.pi)) class Boxes3DTestCase(tf.test.TestCase, parameterized.TestCase): @@ -38,7 +36,7 @@ def test_convert_center_to_corners(self): [[1, 2, 3, 4, 3, 6, np.pi / 2.0], [1, 2, 3, 4, 3, 6, np.pi / 2.0]], ] ) - corners = ops._center_xyzWHD_to_corner_xyz(boxes) + corners = point_cloud._center_xyzWHD_to_corner_xyz(boxes) self.assertEqual((2, 2, 8, 3), corners.shape) for i in [0, 1]: self.assertAllClose(-1, np.min(corners[0, i, :, 0])) @@ -75,7 +73,7 @@ def test_within_box2d(self): ], dtype=tf.float32, ) - is_inside = ops.is_within_box2d(points, boxes) + is_inside = point_cloud.is_within_box2d(points, boxes) expected = [[False]] * 8 + [[True]] * 2 self.assertAllEqual(expected, is_inside) @@ -98,7 +96,7 @@ def test_within_zero_box2d(self): ], dtype=tf.float32, ) - is_inside = ops.is_within_box2d(points, bbox) + is_inside = point_cloud.is_within_box2d(points, bbox) expected = [[False]] * 10 self.assertAllEqual(expected, is_inside) @@ -106,11 +104,11 @@ def test_is_on_lefthand_side(self): v1 = tf.constant([[0.0, 0.0]], dtype=tf.float32) v2 = tf.constant([[1.0, 0.0]], dtype=tf.float32) p = tf.constant([[0.5, 0.5], [-1.0, -3], [-1.0, 1.0]], dtype=tf.float32) - res = ops._is_on_lefthand_side(p, v1, v2) + res = point_cloud._is_on_lefthand_side(p, v1, v2) self.assertAllEqual([[True, False, True]], res) - res = ops._is_on_lefthand_side(v1, v1, v2) + res = point_cloud._is_on_lefthand_side(v1, v1, v2) self.assertAllEqual([[True]], res) - res = ops._is_on_lefthand_side(v2, v1, v2) + res = point_cloud._is_on_lefthand_side(v2, v1, v2) self.assertAllEqual([[True]], res) @parameterized.named_parameters( @@ -138,7 +136,7 @@ def _rotate(bbox, theta): return tf.matmul(bbox, rotation_matrix) rotated_bboxes = _rotate(boxes, angle) - res = ops._box_area(rotated_bboxes) + res = point_cloud._box_area(rotated_bboxes) self.assertAllClose(expected, res) def test_within_box3d(self): @@ -205,7 +203,7 @@ def test_within_box3d(self): assert bboxes.shape[0] == num_boxes assert expected_is_inside.shape[0] == num_points assert expected_is_inside.shape[1] == num_boxes - is_inside = ops.is_within_box3d(points, bboxes) + is_inside = point_cloud.is_within_box3d(points, bboxes) self.assertAllEqual([num_points, num_boxes], is_inside.shape) self.assertAllEqual(expected_is_inside, is_inside) # Add a batch dimension to the data and see that it still works @@ -213,7 +211,7 @@ def test_within_box3d(self): batch_size = 3 points = tf.tile(points[tf.newaxis, ...], [batch_size, 1, 1]) bboxes = tf.tile(bboxes[tf.newaxis, ...], [batch_size, 1, 1]) - is_inside = ops.is_within_box3d(points, bboxes) + is_inside = point_cloud.is_within_box3d(points, bboxes) self.assertAllEqual([batch_size, num_points, num_boxes], is_inside.shape) for batch_idx in range(batch_size): self.assertAllEqual(expected_is_inside, is_inside[batch_idx]) @@ -244,7 +242,7 @@ def testCoordinateTransform(self): dtype=tf.float32, ) - result = ops.coordinate_transform(replicated_points, pose) + result = point_cloud.coordinate_transform(replicated_points, pose) # We expect the point to be translated close to the car, and then rotated # mostly around the x-axis. @@ -256,7 +254,7 @@ def testCoordinateTransform(self): def testSphericalCoordinatesTransform(self): np_xyz = np.random.randn(5, 6, 3) points = tf.constant(np_xyz, dtype=tf.float32) - spherical_coordinates = ops.spherical_coordinate_transform(points) + spherical_coordinates = point_cloud.spherical_coordinate_transform(points) # Convert coordinates back to xyz to verify. dist = spherical_coordinates[..., 0] @@ -312,7 +310,7 @@ def test_group_points(self): ], dtype=tf.float32, ) - res = ops.group_points_by_boxes(points, bboxes) + res = point_cloud.group_points_by_boxes(points, bboxes) expected_result = tf.ragged.constant( [[0, 1, 2], [5, 6, 7, 16], [10, 11, 12], [17]] ) @@ -322,19 +320,19 @@ def testWithinAFrustum(self): center = tf.constant([1.0, 1.0, 1.0]) points = tf.constant([[0.0, 0.0, 0.0], [1.0, 2.0, 1.0], [1.0, 0.0, 1.0]]) - point_mask = ops.within_a_frustum( + point_mask = point_cloud.within_a_frustum( points, center, r_distance=1.0, theta_width=1.0, phi_width=1.0 ) target_point_mask = tf.constant([False, True, False]) self.assertAllClose(point_mask, target_point_mask) - point_mask = ops.within_a_frustum( + point_mask = point_cloud.within_a_frustum( points, center, r_distance=1.0, theta_width=3.14, phi_width=3.14 ) target_point_mask = tf.constant([False, True, True]) self.assertAllClose(point_mask, target_point_mask) - point_mask = ops.within_a_frustum( + point_mask = point_cloud.within_a_frustum( points, center, r_distance=3.0, theta_width=1.0, phi_width=1.0 ) target_point_mask = tf.constant([False, False, False]) diff --git a/keras_cv/ops/within_box_3d_test.py b/keras_cv/point_cloud/within_box_3d_test.py similarity index 93% rename from keras_cv/ops/within_box_3d_test.py rename to keras_cv/point_cloud/within_box_3d_test.py index f7e6127d5c..63f89254f6 100644 --- a/keras_cv/ops/within_box_3d_test.py +++ b/keras_cv/point_cloud/within_box_3d_test.py @@ -81,7 +81,7 @@ def test_unbatched_unrotated(self): [5.6, 5.5, 5.5], ] ).astype("float32") - res = keras_cv.ops.within_box3d_index(points, boxes) + res = keras_cv.point_cloud.within_box3d_index(points, boxes) self.assertAllEqual([0, 0, -1, 0, -1, 1, -1], res) @pytest.mark.skipif( @@ -107,7 +107,7 @@ def test_unbatched_rotated(self): [2.83, 0, 0], ] ).astype("float32") - res = keras_cv.ops.within_box3d_index(points, boxes) + res = keras_cv.point_cloud.within_box3d_index(points, boxes) self.assertAllClose([0, 0, -1, 0, -1], res) @pytest.mark.skipif( @@ -138,7 +138,7 @@ def test_batched_unrotated(self): ] * 2 ).astype("float32") - res = keras_cv.ops.within_box3d_index(points, boxes) + res = keras_cv.point_cloud.within_box3d_index(points, boxes) self.assertAllEqual( [[0, 0, -1, 0, -1, -1, -1], [-1, -1, -1, -1, -1, 0, -1]], res ) @@ -170,7 +170,7 @@ def test_batched_rotated(self): ] * 2 ).astype("float32") - res = keras_cv.ops.within_box3d_index(points, boxes) + res = keras_cv.point_cloud.within_box3d_index(points, boxes) self.assertAllEqual([[0, 0, -1, 0, -1], [-1, -1, -1, -1, -1]], res) @pytest.mark.skipif( @@ -181,7 +181,7 @@ def test_many_points(self): points, boxes = get_points_boxes() for _ in range(5): - res = keras_cv.ops.within_box3d_index(points, boxes) + res = keras_cv.point_cloud.within_box3d_index(points, boxes) self.assertAllClose(res.shape, points.shape[:1]) @pytest.mark.skipif( @@ -195,6 +195,6 @@ def test_equal(self): box_dim = tf.random.uniform(shape=[1, 3], minval=0.1, maxval=10.0) boxes = tf.concat([box_center, box_dim, [[0.0]]], axis=-1) points = tf.random.normal([32, 3]) - res = keras_cv.ops.is_within_any_box3d(points, boxes) - res_v2 = keras_cv.ops.is_within_any_box3d_v2(points, boxes) + res = keras_cv.point_cloud.is_within_any_box3d(points, boxes) + res_v2 = keras_cv.point_cloud.is_within_any_box3d_v2(points, boxes) self.assertAllEqual(res, res_v2) From 4fb2bbc1f114a5374a598611d10482f9bc94f6a3 Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Mon, 6 Feb 2023 18:21:51 -0500 Subject: [PATCH 07/27] Update docstring for base KPL (#1363) * Update docstring for base KPL * typos * Adjust wording slightly --- .../base_image_augmentation_layer.py | 26 +++++++++++++------ 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py index 1df7bca324..0099b50de9 100644 --- a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py +++ b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py @@ -57,14 +57,24 @@ class BaseImageAugmentationLayer(tf.keras.__internal__.layers.BaseRandomLayer): coodinate the randomness behavior, eg, in the RandomFlip layer, the image and bounding_boxes should be changed in the same way. - The `call()` method support two formats of inputs: - 1. Single image tensor with 3D (HWC) or 4D (NHWC) format. - 2. A dict of tensors with stable keys. The supported keys are: - `"images"`, `"labels"` and `"bounding_boxes"` at the moment. We might add - more keys in future when we support more types of augmentation. - - The output of the `call()` will be in two formats, which will be the same - structure as the inputs. + The `call()` method supports two formats of inputs: + 1. A single image tensor with shape (height, width, channels) or + (batch_size, height, width, channels) + 1. A dict of tensors with any of the following keys (note that `"images"` + must be present): + * `"images"` - Image Tensor with shape (height, width, channels) or + (batch_size, height, width, channels) + * `"labels"` - One-hot encoded classification labels Tensor with shape + (num_classes) or (batch_size, num_classes) + * `"bounding_boxes"` - A dictionary with keys: + * `"boxes"` - Tensor with shape (num_boxes, 4) or (batch_size, + num_boxes, 4) + * `"classes"` - Tensor of class labels for boxes with shape (num_boxes, + num_classes) or (batch_size, num_boxes, num_classes). + Any other keys included in this dictionary will be ignored and unmodified + by an augmentation layer. + + The output of the `call()` will be the same structure as the inputs. The `call()` will handle the logic detecting the training/inference mode, unpack the inputs, forward to the correct function, and pack the output back From cbf7f8f2adf496640704144e87ed69e735bc9a6a Mon Sep 17 00:00:00 2001 From: Jonathan Bischof Date: Tue, 7 Feb 2023 01:15:59 +0000 Subject: [PATCH 08/27] Add `tensorflow` to `test` requirements (#1365) --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index ed3200f7e5..120dae710f 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ def is_pure(self): "black[jupyter]", "pytest", "pycocotools", + "tensorflow", ], "examples": ["tensorflow_datasets", "matplotlib"], }, From b6fbf5d2cb78dfaa33089417969d4933bc32fb32 Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Tue, 7 Feb 2023 23:04:00 -0500 Subject: [PATCH 09/27] Disable mixed-precision at end of deeplab MP test (#1371) --- keras_cv/models/segmentation/deeplab_test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/keras_cv/models/segmentation/deeplab_test.py b/keras_cv/models/segmentation/deeplab_test.py index 895d5b4909..daba76240c 100644 --- a/keras_cv/models/segmentation/deeplab_test.py +++ b/keras_cv/models/segmentation/deeplab_test.py @@ -77,6 +77,7 @@ def test_mixed_precision(self): output = model(input_image, training=True) self.assertEquals(output["output"].dtype, tf.float32) + tf.keras.mixed_precision.set_global_policy("float32") def test_invalid_backbone_model(self): with self.assertRaisesRegex( From 6c0accb105879696fc21c31ab7b1734e4cdcd04d Mon Sep 17 00:00:00 2001 From: IMVision <88665786+IMvision12@users.noreply.github.com> Date: Thu, 9 Feb 2023 03:37:15 +0530 Subject: [PATCH 10/27] Fix ConvMixer (#1336) * fix * format * Update convmixer.py * Update convmixer.py * Update convmixer.py * Update convmixer.py * Doc Fix * patch * fix doc --- keras_cv/models/convmixer.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/keras_cv/models/convmixer.py b/keras_cv/models/convmixer.py index a1adddb18c..94f0d8b202 100644 --- a/keras_cv/models/convmixer.py +++ b/keras_cv/models/convmixer.py @@ -88,11 +88,10 @@ """ -def CovnMixer_Layer(dim, kernel_size): - """CovnMixer Layer module. +def ConvMixerLayer(dim, kernel_size): + """ConvMixerLayer module. Args: - inputs: Input tensor. - dim: integer, filters of the layer in a block. + dim: integer, Number of filters for convolution layers. kernel_size: integer, kernel size of the Conv2d layers. Returns: Output tensor for the CovnMixer Layer. @@ -113,10 +112,10 @@ def apply(x): return apply -def patch_embed(dim, patch_size): +def PatchEmbed(dim, patch_size): """Implementation for Extracting Patch Embeddings. Args: - inputs: Input tensor. + dim: integer, Number of filters for convolution layers. patch_size: integer, Size of patches. Returns: Output tensor for the patch embed. @@ -148,7 +147,7 @@ def ConvMixer( ): """Instantiates the ConvMixer architecture. Args: - dim: number of filters. + dim: Number of filters for convolution layers. depth: number of CovnMixer Layer. patch_size: Size of the patches. kernel_size: kernel size for conv2d layers. @@ -206,10 +205,10 @@ def ConvMixer( if include_rescaling: x = layers.Rescaling(1 / 255.0)(x) - x = patch_embed(dim, patch_size)(x) + x = PatchEmbed(dim, patch_size)(x) for _ in range(depth): - x = CovnMixer_Layer(dim, kernel_size)(x) + x = ConvMixerLayer(dim, kernel_size)(x) if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) From f289d9041fcf67fec9a38f7f599f9295fbcda7a3 Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Wed, 8 Feb 2023 20:56:47 -0500 Subject: [PATCH 11/27] Update base_image_augmentation_layer.py (#1375) --- keras_cv/layers/preprocessing/base_image_augmentation_layer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py index 0099b50de9..fe6656250c 100644 --- a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py +++ b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py @@ -107,7 +107,7 @@ def augment_image(self, image, transformation): return (inputs - mean) * random_factor + mean ``` - Note that since the randomness is also a common functionnality, this layer + Note that since the randomness is also a common functionality, this layer also includes a tf.keras.backend.RandomGenerator, which can be used to produce the random numbers. The random number generator is stored in the `self._random_generator` attribute. From 08ca8d6697e1a16751792e72230b42ed17ee9175 Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Wed, 8 Feb 2023 21:08:28 -0500 Subject: [PATCH 12/27] Add VectorizedBaseImageAugmentation layer (#1373) * implement vectorized base image augmentation layer * Implement vectorized RandomContrast layer * KPL performance * Random contrast vectorized * Vectorized contrast * Fix vectorized base layer * Fix vectorized base layer * Add vectorized grayscale layer * Remove random contrast * Remove random contrast * test_preserves_ragged_status_Grayscale * test_preserves_ragged_status_Grayscale * Fix * Fix masks * Fix masks * rename to 'batched' * Fix docstrings * Fix docstrings * Remove ragged method * Begin ragged image support * Begin ragged image support * Begin ragged image support * Begin ragged image support * Begin ragged image support * Performance benchmark * Reformat * Vectorized grayscale * Fix ragged test case * Fix ragged test case * Fix ragged test case * Fix ragged test case * Fix ragged test case --- benchmarks/vectorized_grayscale.py | 178 ++++++++ keras_cv/layers/preprocessing/grayscale.py | 45 +- .../layers/preprocessing/grayscale_test.py | 24 +- .../layers/preprocessing/ragged_image_test.py | 8 +- ...ectorized_base_image_augmentation_layer.py | 425 ++++++++++++++++++ ...ized_base_image_augmentation_layer_test.py | 263 +++++++++++ 6 files changed, 903 insertions(+), 40 deletions(-) create mode 100644 benchmarks/vectorized_grayscale.py create mode 100644 keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py create mode 100644 keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py diff --git a/benchmarks/vectorized_grayscale.py b/benchmarks/vectorized_grayscale.py new file mode 100644 index 0000000000..3d3b8d3977 --- /dev/null +++ b/benchmarks/vectorized_grayscale.py @@ -0,0 +1,178 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow.keras as keras + +from keras_cv.layers import Grayscale +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) + + +class OldGrayscale(BaseImageAugmentationLayer): + """Grayscale is a preprocessing layer that transforms RGB images to Grayscale images. + Input images should have values in the range of [0, 255]. + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format + Args: + output_channels. + Number color channels present in the output image. + The output_channels can be 1 or 3. RGB image with shape + (..., height, width, 3) will have the following shapes + after the `Grayscale` operation: + a. (..., height, width, 1) if output_channels = 1 + b. (..., height, width, 3) if output_channels = 3. + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + to_grayscale = keras_cv.layers.preprocessing.Grayscale() + augmented_images = to_grayscale(images) + ``` + """ + + def __init__(self, output_channels=1, **kwargs): + super().__init__(**kwargs) + self.output_channels = output_channels + # This layer may raise an error when running on GPU using auto_vectorize + self.auto_vectorize = False + + def compute_image_signature(self, images): + # required because of the `output_channels` argument + if isinstance(images, tf.RaggedTensor): + ragged_spec = tf.RaggedTensorSpec( + shape=images.shape[1:3] + [self.output_channels], + ragged_rank=1, + dtype=self.compute_dtype, + ) + return ragged_spec + return tf.TensorSpec( + images.shape[1:3] + [self.output_channels], self.compute_dtype + ) + + def _check_input_params(self, output_channels): + if output_channels not in [1, 3]: + raise ValueError( + "Received invalid argument output_channels. " + f"output_channels must be in 1 or 3. Got {output_channels}" + ) + self.output_channels = output_channels + + def augment_image(self, image, transformation=None, **kwargs): + grayscale = tf.image.rgb_to_grayscale(image) + if self.output_channels == 1: + return grayscale + elif self.output_channels == 3: + return tf.image.grayscale_to_rgb(grayscale) + else: + raise ValueError("Unsupported value for `output_channels`.") + + def augment_bounding_boxes(self, bounding_boxes, **kwargs): + return bounding_boxes + + def augment_label(self, label, transformation=None, **kwargs): + return label + + def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + return segmentation_mask + + def get_config(self): + config = { + "output_channels": self.output_channels, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +(x_train, _), _ = keras.datasets.cifar10.load_data() +x_train = x_train.astype(float) + +x_train.shape + + +images = [] + +num_images = [1000, 2000, 5000, 10000] + +results = {} + +for aug in [Grayscale, OldGrayscale]: + c = aug.__name__ + + layer = aug() + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + + results[c] = runtimes + + c = aug.__name__ + " Graph Mode" + + layer = aug() + + @tf.function() + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + + results[c] = runtimes + +plt.figure() +for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + +plt.ylabel("Runtime (seconds)") +plt.legend() +plt.show() + +# So we can actually see more relevant margins +del results["OldGrayscale"] + +plt.figure() +for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + +plt.ylabel("Runtime (seconds)") +plt.legend() +plt.show() diff --git a/keras_cv/layers/preprocessing/grayscale.py b/keras_cv/layers/preprocessing/grayscale.py index bc25292c63..7853d84dd0 100644 --- a/keras_cv/layers/preprocessing/grayscale.py +++ b/keras_cv/layers/preprocessing/grayscale.py @@ -14,13 +14,13 @@ import tensorflow as tf -from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( - BaseImageAugmentationLayer, +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, ) @tf.keras.utils.register_keras_serializable(package="keras_cv") -class Grayscale(BaseImageAugmentationLayer): +class Grayscale(VectorizedBaseImageAugmentationLayer): """Grayscale is a preprocessing layer that transforms RGB images to Grayscale images. Input images should have values in the range of [0, 255]. @@ -50,21 +50,7 @@ class Grayscale(BaseImageAugmentationLayer): def __init__(self, output_channels=1, **kwargs): super().__init__(**kwargs) self.output_channels = output_channels - # This layer may raise an error when running on GPU using auto_vectorize - self.auto_vectorize = False - - def compute_image_signature(self, images): - # required because of the `output_channels` argument - if isinstance(images, tf.RaggedTensor): - ragged_spec = tf.RaggedTensorSpec( - shape=images.shape[1:3] + [self.output_channels], - ragged_rank=1, - dtype=self.compute_dtype, - ) - return ragged_spec - return tf.TensorSpec( - images.shape[1:3] + [self.output_channels], self.compute_dtype - ) + self._check_input_params(output_channels) def _check_input_params(self, output_channels): if output_channels not in [1, 3]: @@ -74,8 +60,19 @@ def _check_input_params(self, output_channels): ) self.output_channels = output_channels - def augment_image(self, image, transformation=None, **kwargs): - grayscale = tf.image.rgb_to_grayscale(image) + def compute_ragged_image_signature(self, images): + ragged_spec = tf.RaggedTensorSpec( + shape=images.shape[1:3] + (self.output_channels,), + ragged_rank=1, + dtype=self.compute_dtype, + ) + return ragged_spec + + def augment_ragged_image(self, image, transformation, **kwargs): + return self.augment_images(image, transformations=transformation, **kwargs) + + def augment_images(self, images, transformations=None, **kwargs): + grayscale = tf.image.rgb_to_grayscale(images) if self.output_channels == 1: return grayscale elif self.output_channels == 3: @@ -86,11 +83,11 @@ def augment_image(self, image, transformation=None, **kwargs): def augment_bounding_boxes(self, bounding_boxes, **kwargs): return bounding_boxes - def augment_label(self, label, transformation=None, **kwargs): - return label + def augment_labels(self, labels, transformations=None, **kwargs): + return labels - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): - return segmentation_mask + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + return segmentation_masks def get_config(self): config = { diff --git a/keras_cv/layers/preprocessing/grayscale_test.py b/keras_cv/layers/preprocessing/grayscale_test.py index c7ccf335f7..eba6b02271 100644 --- a/keras_cv/layers/preprocessing/grayscale_test.py +++ b/keras_cv/layers/preprocessing/grayscale_test.py @@ -18,7 +18,7 @@ class GrayscaleTest(tf.test.TestCase): def test_return_shapes(self): - xs = tf.ones((2, 512, 512, 3)) + xs = tf.ones((2, 52, 24, 3)) layer = preprocessing.Grayscale( output_channels=1, @@ -30,12 +30,12 @@ def test_return_shapes(self): ) xs2 = layer(xs, training=True) - self.assertEqual(xs1.shape, [2, 512, 512, 1]) - self.assertEqual(xs2.shape, [2, 512, 512, 3]) + self.assertEqual(xs1.shape, [2, 52, 24, 1]) + self.assertEqual(xs2.shape, [2, 52, 24, 3]) def test_in_tf_function(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 3)), tf.ones((100, 100, 3))], axis=0), + tf.stack([2 * tf.ones((10, 10, 3)), tf.ones((10, 10, 3))], axis=0), tf.float32, ) @@ -61,12 +61,12 @@ def augment(x): xs2 = augment(xs) - self.assertEqual(xs1.shape, [2, 100, 100, 1]) - self.assertEqual(xs2.shape, [2, 100, 100, 3]) + self.assertEqual(xs1.shape, [2, 10, 10, 1]) + self.assertEqual(xs2.shape, [2, 10, 10, 3]) def test_non_square_image(self): xs = tf.cast( - tf.stack([2 * tf.ones((512, 1024, 3)), tf.ones((512, 1024, 3))], axis=0), + tf.stack([2 * tf.ones((52, 24, 3)), tf.ones((52, 24, 3))], axis=0), tf.float32, ) @@ -80,12 +80,12 @@ def test_non_square_image(self): ) xs2 = layer(xs, training=True) - self.assertEqual(xs1.shape, [2, 512, 1024, 1]) - self.assertEqual(xs2.shape, [2, 512, 1024, 3]) + self.assertEqual(xs1.shape, [2, 52, 24, 1]) + self.assertEqual(xs2.shape, [2, 52, 24, 3]) def test_in_single_image(self): xs = tf.cast( - tf.ones((512, 512, 3)), + tf.ones((52, 24, 3)), dtype=tf.float32, ) @@ -99,5 +99,5 @@ def test_in_single_image(self): ) xs2 = layer(xs, training=True) - self.assertEqual(xs1.shape, [512, 512, 1]) - self.assertEqual(xs2.shape, [512, 512, 3]) + self.assertEqual(xs1.shape, [52, 24, 1]) + self.assertEqual(xs2.shape, [52, 24, 3]) diff --git a/keras_cv/layers/preprocessing/ragged_image_test.py b/keras_cv/layers/preprocessing/ragged_image_test.py index a6212131da..551d3d83da 100644 --- a/keras_cv/layers/preprocessing/ragged_image_test.py +++ b/keras_cv/layers/preprocessing/ragged_image_test.py @@ -126,8 +126,8 @@ def test_preserves_ragged_status(self, layer_cls, init_args): layer = layer_cls(**init_args) inputs = tf.ragged.stack( [ - tf.ones((512, 512, 3)), - tf.ones((600, 300, 3)), + tf.ones((5, 5, 3)), + tf.ones((8, 8, 3)), ] ) outputs = layer(inputs) @@ -138,8 +138,8 @@ def test_converts_ragged_to_dense(self, layer_cls, init_args): layer = layer_cls(**init_args) inputs = tf.ragged.stack( [ - tf.ones((512, 512, 3)), - tf.ones((600, 300, 3)), + tf.ones((5, 5, 3)), + tf.ones((8, 8, 3)), ] ) outputs = layer(inputs) diff --git a/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py new file mode 100644 index 0000000000..5754b280dc --- /dev/null +++ b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py @@ -0,0 +1,425 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf + +from keras_cv import bounding_box +from keras_cv.utils import preprocessing + +H_AXIS = -3 +W_AXIS = -2 + +IMAGES = "images" +LABELS = "labels" +TARGETS = "targets" +BOUNDING_BOXES = "bounding_boxes" +KEYPOINTS = "keypoints" +SEGMENTATION_MASKS = "segmentation_masks" + +IS_DICT = "is_dict" +BATCHED = "batched" +USE_TARGETS = "use_targets" + + +class VectorizedBaseImageAugmentationLayer( + tf.keras.__internal__.layers.BaseRandomLayer +): + """Abstract base layer for vectorized image augmentaion. + + This layer contains base functionalities for preprocessing layers which + augment image related data, eg. image and in future, label and bounding + boxes. The subclasses could avoid making certain mistakes and reduce code + duplications. + + This layer requires you to implement one method: `augment_images()`, which + augments one single image during the training. There are a few additional + methods that you can implement for added functionality on the layer: + + `augment_labels()`, which handles label augmentation if the layer supports + that. + + `augment_bounding_boxes()`, which handles the bounding box augmentation, if + the layer supports that. + + `get_random_transformations()`, which should produce a batch of random + transformation settings. The tranformation object, which must be a batched Tensor or + a dictionary where each input is a batched Tensor, will be + passed to `augment_images`, `augment_labels` and `augment_bounding_boxes`, to + coodinate the randomness behavior, eg, in the RandomFlip layer, the image + and bounding_boxes should be changed in the same way. + + The `call()` method support two formats of inputs: + 1. Single image tensor with 3D (HWC) or 4D (NHWC) format. + 2. A dict of tensors with stable keys. The supported keys are: + `"images"`, `"labels"` and `"bounding_boxes"` at the moment. We might add + more keys in future when we support more types of augmentation. + + The output of the `call()` will be in two formats, which will be the same + structure as the inputs. + + The `call()` will handle the logic detecting the training/inference mode, + unpack the inputs, forward to the correct function, and pack the output back + to the same structure as the inputs. + + Note that since the randomness is also a common functionality, this layer + also includes a tf.keras.backend.RandomGenerator, which can be used to + produce the random numbers. The random number generator is stored in the + `self._random_generator` attribute. + """ + + def __init__(self, seed=None, **kwargs): + super().__init__(seed=seed, **kwargs) + + def augment_ragged_image(self, image, transformation, **kwargs): + """Augment an image from a ragged image batch during training. + + This method accepts a single Dense image Tensor, and returns a Dense image. + The resulting images are then stacked back into a ragged image batch. The + behavior of this method should be identical to that of `augment_images()` but + is to operate on a batch-wise basis. + + Args: + image: a single image from the batch + transformation: a single transformation sampled from + `get_random_transformations()`. + kwargs: all of the other call arguments (i.e. bounding_boxes, labels, etc.). + Returns: + Augmented image. + """ + raise NotImplementedError( + "A ragged image batch was passed to layer of type " + f"`{type(self).__name__}`. This layer does not implement " + "`augment_ragged_image()`. If this is a `keras_cv`, open a GitHub issue " + "requesting Ragged functionality on the layer titled: " + f"'`{type(self).__name__}`: ragged image support'. " + "If this is a custom layer, implement the `augment_ragged_image()` method." + ) + + def compute_ragged_image_signature(self, images): + """Computes the output image signature for the `augment_image()` function. + + Must be overridden to return tensors with different shapes than the input + images. By default returns either a `tf.RaggedTensorSpec` matching the input + image spec, or a `tf.TensorSpec` matching the input image spec. + """ + ragged_spec = tf.RaggedTensorSpec( + shape=images.shape[1:], + ragged_rank=1, + dtype=self.compute_dtype, + ) + return ragged_spec + + def augment_images(self, images, transformations, **kwargs): + """Augment a batch of images during training. + + Args: + image: 4D image input tensor to the layer. Forwarded from + `layer.call()`. This should generally have the shape [B, H, W, C]. + Forwarded from `layer.call()`. + transformations: The transformations object produced by + `get_random_transformations`. Used to coordinate the randomness + between image, label, bounding box, keypoints, and segmentation mask. + + Returns: + output 4D tensor, which will be forward to `layer.call()`. + """ + raise NotImplementedError() + + def augment_labels(self, labels, transformations, **kwargs): + """Augment a batch of labels during training. + + Args: + label: 2D label to the layer. Forwarded from `layer.call()`. + transformations: The transformations object produced by + `get_random_transformations`. Used to coordinate the randomness + between image, label, bounding box, keypoints, and segmentation mask. + + Returns: + output 2D tensor, which will be forward to `layer.call()`. + """ + raise NotImplementedError() + + def augment_targets(self, targets, transformations, **kwargs): + """Augment a batch of targets during training. + + Args: + target: 2D label to the layer. Forwarded from `layer.call()`. + transformations: The transformations object produced by + `get_random_transformations`. Used to coordinate the randomness + between image, label, bounding box, keypoints, and segmentation mask. + + Returns: + output 2D tensor, which will be forward to `layer.call()`. + """ + return self.augment_labels(targets, transformations) + + def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): + """Augment bounding boxes for one image during training. + + Args: + bounding_boxes: 3D bounding boxes to the layer. Forwarded from + `call()`. + transformations: The transformations object produced by + `get_random_transformations`. Used to coordinate the randomness + between image, label, bounding box, keypoints, and segmentation mask. + + Returns: + output 3D tensor, which will be forward to `layer.call()`. + """ + raise NotImplementedError() + + def augment_keypoints(self, keypoints, transformations, **kwargs): + """Augment a batch of keypoints for one image during training. + + Args: + keypoints: 3D keypoints input tensor to the layer. Forwarded from + `layer.call()`. Shape should be [batch, num_keypoints, 2] in the specified + keypoint format. + transformations: The transformations object produced by + `get_random_transformations`. Used to coordinate the randomness + between image, label, bounding box, keypoints, and segmentation mask. + + Returns: + output 3D tensor, which will be forward to `layer.call()`. + """ + raise NotImplementedError() + + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + """Augment a batch of images' segmentation masks during training. + + Args: + segmentation_mask: 4D segmentation mask input tensor to the layer. + This should generally have the shape [B, H, W, 1], or in some cases + [B, H, W, C] for multilabeled data. Forwarded from `layer.call()`. + transformations: The transformations object produced by + `get_random_transformations`. Used to coordinate the randomness + between image, label, bounding box, keypoints, and segmentation mask. + + Returns: + output 4D tensor containing the augmented segmentation mask, which will be forward to `layer.call()`. + """ + raise NotImplementedError() + + def get_random_transformation_batch( + self, + batch_size, + images=None, + labels=None, + bounding_boxes=None, + keypoints=None, + segmentation_masks=None, + ): + """Produce random transformations config for a batch of inputs. + + This is used to produce same randomness between + image/label/bounding_box. + + Args: + batch_size: the batch size of transformations configuration to sample. + image: 3D image tensor from inputs. + label: optional 1D label tensor from inputs. + bounding_box: optional 2D bounding boxes tensor from inputs. + segmentation_mask: optional 3D segmentation mask tensor from inputs. + + Returns: + Any type of object, which will be forwarded to `augment_images`, + `augment_labels` and `augment_bounding_boxes` as the `transformations` + parameter. + """ + # Required to work with map_fn in the ragged cast. + return tf.zeros((batch_size)) + + def _unwrap_ragged_image_call(self, inputs): + images = inputs.get(IMAGES, None) + labels = inputs.get(LABELS, None) + bounding_boxes = inputs.get(BOUNDING_BOXES, None) + keypoints = inputs.get(KEYPOINTS, None) + segmentation_masks = inputs.get(SEGMENTATION_MASKS, None) + transformation = inputs.get("transformations") + images = images.to_tensor() + images = self.augment_ragged_image( + image=images, + label=labels, + bounding_boxes=bounding_boxes, + keypoints=keypoints, + segmentation_mask=segmentation_masks, + transformation=transformation, + ) + return tf.RaggedTensor.from_tensor(images) + + def _batch_augment(self, inputs): + images = inputs.get(IMAGES, None) + labels = inputs.get(LABELS, None) + bounding_boxes = inputs.get(BOUNDING_BOXES, None) + keypoints = inputs.get(KEYPOINTS, None) + segmentation_masks = inputs.get(SEGMENTATION_MASKS, None) + + batch_size = tf.shape(images)[0] + + transformations = self.get_random_transformation_batch( + batch_size, + images=images, + labels=labels, + bounding_boxes=bounding_boxes, + keypoints=keypoints, + segmentation_masks=segmentation_masks, + ) + + if isinstance(images, tf.RaggedTensor): + inputs_for_raggeds = {"transformations": transformations, **inputs} + print("inputs_for_raggeds", inputs_for_raggeds) + print("self._unwrap_ragged_image_call", self._unwrap_ragged_image_call) + images = tf.map_fn( + self._unwrap_ragged_image_call, + inputs_for_raggeds, + fn_output_signature=self.compute_ragged_image_signature(images), + ) + else: + images = self.augment_images( + images, + transformations=transformations, + bounding_boxes=bounding_boxes, + label=labels, + ) + + result = {IMAGES: images} + if labels is not None: + labels = self.augment_targets( + labels, + transformations=transformations, + bounding_boxes=bounding_boxes, + image=images, + ) + result[LABELS] = labels + + if bounding_boxes is not None: + bounding_boxes = self.augment_bounding_boxes( + bounding_boxes, + transformations=transformations, + labels=labels, + images=images, + ) + bounding_boxes = bounding_box.to_ragged(bounding_boxes) + result[BOUNDING_BOXES] = bounding_boxes + + if keypoints is not None: + keypoints = self.augment_keypoints( + keypoints, + transformations=transformations, + label=labels, + bounding_boxes=bounding_boxes, + images=images, + ) + result[KEYPOINTS] = keypoints + if segmentation_masks is not None: + segmentation_masks = self.augment_segmentation_masks( + segmentation_masks, + transformations=transformations, + ) + result[SEGMENTATION_MASKS] = segmentation_masks + + # preserve any additional inputs unmodified by this layer. + for key in inputs.keys() - result.keys(): + result[key] = inputs[key] + return result + + def call(self, inputs, training=True): + # TODO(lukewood): remove training=False behavior. + inputs = self._ensure_inputs_are_compute_dtype(inputs) + if training: + inputs, metadata = self._format_inputs(inputs) + images = inputs[IMAGES] + if images.shape.rank == 3 or images.shape.rank == 4: + return self._format_output(self._batch_augment(inputs), metadata) + else: + raise ValueError( + "Image augmentation layers are expecting inputs to be " + "rank 3 (HWC) or 4D (NHWC) tensors. Got shape: " + f"{images.shape}" + ) + else: + return inputs + + def _format_inputs(self, inputs): + metadata = {IS_DICT: True, USE_TARGETS: False} + if tf.is_tensor(inputs): + # single image input tensor + metadata[IS_DICT] = False + inputs = {IMAGES: inputs} + + metadata[BATCHED] = inputs["images"].shape.rank == 4 + if inputs["images"].shape.rank == 3: + for key in list(inputs.keys()): + inputs[key] = tf.expand_dims(inputs[key], axis=0) + + if not isinstance(inputs, dict): + raise ValueError( + f"Expect the inputs to be image tensor or dict. Got inputs={inputs}" + ) + + if BOUNDING_BOXES in inputs: + inputs[BOUNDING_BOXES] = self._format_bounding_boxes(inputs[BOUNDING_BOXES]) + + if isinstance(inputs, dict) and TARGETS in inputs: + # TODO(scottzhu): Check if it only contains the valid keys + inputs[LABELS] = inputs[TARGETS] + del inputs[TARGETS] + metadata[USE_TARGETS] = True + return inputs, metadata + + return inputs, metadata + + def _format_output(self, output, metadata): + if not metadata[BATCHED]: + for key in list(output.keys()): + output[key] = tf.squeeze(output[key], axis=0) + + if not metadata[IS_DICT]: + return output[IMAGES] + elif metadata[USE_TARGETS]: + output[TARGETS] = output[LABELS] + del output[LABELS] + return output + + def _ensure_inputs_are_compute_dtype(self, inputs): + if not isinstance(inputs, dict): + return preprocessing.ensure_tensor( + inputs, + self.compute_dtype, + ) + inputs[IMAGES] = preprocessing.ensure_tensor( + inputs[IMAGES], + self.compute_dtype, + ) + if BOUNDING_BOXES in inputs: + inputs[BOUNDING_BOXES]["boxes"] = preprocessing.ensure_tensor( + inputs[BOUNDING_BOXES]["boxes"], + self.compute_dtype, + ) + inputs[BOUNDING_BOXES]["classes"] = preprocessing.ensure_tensor( + inputs[BOUNDING_BOXES]["classes"], + self.compute_dtype, + ) + return inputs + + def _format_bounding_boxes(self, bounding_boxes): + # We can't catch the case where this is None, sometimes RaggedTensor drops this + # dimension + if "classes" not in bounding_boxes: + raise ValueError( + "Bounding boxes are missing class_id. If you would like to pad the " + "bounding boxes with class_id, use: " + "`bounding_boxes['classes'] = tf.ones_like(bounding_boxes['boxes'])`." + ) + return bounding_boxes diff --git a/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py new file mode 100644 index 0000000000..6f473cb8b7 --- /dev/null +++ b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py @@ -0,0 +1,263 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import tensorflow as tf + +from keras_cv import bounding_box +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, +) + + +class VectorizedRandomAddLayer(VectorizedBaseImageAugmentationLayer): + def __init__(self, add_range=(0.0, 1.0), fixed_value=None, **kwargs): + super().__init__(**kwargs) + self.add_range = add_range + self.fixed_value = fixed_value + + def augment_ragged_image(self, image, transformation, **kwargs): + return image + transformation[None, None] + + def get_random_transformation_batch(self, batch_size, **kwargs): + if self.fixed_value: + return tf.ones((batch_size,)) * self.fixed_value + return self._random_generator.random_uniform( + (batch_size,), minval=self.add_range[0], maxval=self.add_range[1] + ) + + def augment_images(self, images, transformations, **kwargs): + return images + transformations[:, None, None, None] + + def augment_labels(self, labels, transformations, **kwargs): + return labels + transformations[:, None] + + def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): + return { + "boxes": bounding_boxes["boxes"] + transformations[:, None, None], + "classes": bounding_boxes["classes"] + transformations[:, None], + } + + def augment_keypoints(self, keypoints, transformations, **kwargs): + return keypoints + transformations[:, None, None] + + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + return segmentation_masks + transformations[:, None, None, None] + + +class VectorizedBaseImageAugmentationLayerTest(tf.test.TestCase): + def test_augment_single_image(self): + add_layer = VectorizedRandomAddLayer(fixed_value=2.0) + image = np.random.random(size=(8, 8, 3)).astype("float32") + output = add_layer(image) + + self.assertAllClose(image + 2.0, output) + + def test_augment_dict_return_type(self): + add_layer = VectorizedRandomAddLayer(fixed_value=2.0) + image = np.random.random(size=(8, 8, 3)).astype("float32") + output = add_layer({"images": image}) + + self.assertIsInstance(output, dict) + + def test_augment_casts_dtypes(self): + add_layer = VectorizedRandomAddLayer(fixed_value=2.0) + images = tf.ones((2, 8, 8, 3), dtype="uint8") + output = add_layer(images) + + self.assertAllClose(tf.ones((2, 8, 8, 3), dtype="float32") * 3.0, output) + + def test_augment_batch_images(self): + add_layer = VectorizedRandomAddLayer() + images = np.random.random(size=(2, 8, 8, 3)).astype("float32") + output = add_layer(images) + + diff = output - images + # Make sure the first image and second image get different augmentation + self.assertNotAllClose(diff[0], diff[1]) + + def test_augment_image_and_label(self): + add_layer = VectorizedRandomAddLayer(fixed_value=2.0) + image = np.random.random(size=(8, 8, 3)).astype("float32") + label = np.random.random(size=(1,)).astype("float32") + + output = add_layer({"images": image, "targets": label}) + expected_output = {"images": image + 2.0, "targets": label + 2.0} + self.assertAllClose(output, expected_output) + + def test_augment_image_and_target(self): + add_layer = VectorizedRandomAddLayer(fixed_value=2.0) + image = np.random.random(size=(8, 8, 3)).astype("float32") + label = np.random.random(size=(1,)).astype("float32") + + output = add_layer({"images": image, "targets": label}) + expected_output = {"images": image + 2.0, "targets": label + 2.0} + self.assertAllClose(output, expected_output) + + def test_augment_batch_images_and_targets(self): + add_layer = VectorizedRandomAddLayer() + images = np.random.random(size=(2, 8, 8, 3)).astype("float32") + targets = np.random.random(size=(2, 1)).astype("float32") + output = add_layer({"images": images, "targets": targets}) + + image_diff = output["images"] - images + label_diff = output["targets"] - targets + # Make sure the first image and second image get different augmentation + self.assertNotAllClose(image_diff[0], image_diff[1]) + self.assertNotAllClose(label_diff[0], label_diff[1]) + + def test_augment_leaves_extra_dict_entries_unmodified(self): + add_layer = VectorizedRandomAddLayer(fixed_value=0.5) + images = np.random.random(size=(8, 8, 3)).astype("float32") + filenames = tf.constant("/path/to/first.jpg") + inputs = {"images": images, "filenames": filenames} + _ = add_layer(inputs) + + def test_augment_ragged_images(self): + images = tf.ragged.stack( + [ + np.random.random(size=(8, 8, 3)).astype("float32"), + np.random.random(size=(16, 8, 3)).astype("float32"), + ] + ) + add_layer = VectorizedRandomAddLayer(fixed_value=0.5) + result = add_layer(images) + self.assertAllClose(images + 0.5, result) + + def test_augment_image_and_localization_data(self): + add_layer = VectorizedRandomAddLayer(fixed_value=2.0) + images = np.random.random(size=(8, 8, 8, 3)).astype("float32") + bounding_boxes = { + "boxes": np.random.random(size=(8, 3, 4)).astype("float32"), + "classes": np.random.random(size=(8, 3)).astype("float32"), + } + keypoints = np.random.random(size=(8, 5, 2)).astype("float32") + segmentation_mask = np.random.random(size=(8, 8, 8, 1)).astype("float32") + + output = add_layer( + { + "images": images, + "bounding_boxes": bounding_boxes, + "keypoints": keypoints, + "segmentation_masks": segmentation_mask, + } + ) + expected_output = { + "images": images + 2.0, + "bounding_boxes": bounding_box.to_dense( + { + "boxes": bounding_boxes["boxes"] + 2.0, + "classes": bounding_boxes["classes"] + 2.0, + } + ), + "keypoints": keypoints + 2.0, + "segmentation_masks": segmentation_mask + 2.0, + } + + output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + + self.assertAllClose(output["images"], expected_output["images"]) + self.assertAllClose(output["keypoints"], expected_output["keypoints"]) + self.assertAllClose( + output["bounding_boxes"]["boxes"], + expected_output["bounding_boxes"]["boxes"], + ) + self.assertAllClose( + output["bounding_boxes"]["classes"], + expected_output["bounding_boxes"]["classes"], + ) + self.assertAllClose( + output["segmentation_masks"], expected_output["segmentation_masks"] + ) + + def test_augment_batch_image_and_localization_data(self): + add_layer = VectorizedRandomAddLayer() + images = np.random.random(size=(2, 8, 8, 3)).astype("float32") + bounding_boxes = { + "boxes": np.random.random(size=(2, 3, 4)).astype("float32"), + "classes": np.random.random(size=(2, 3)).astype("float32"), + } + keypoints = np.random.random(size=(2, 5, 2)).astype("float32") + segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32") + + output = add_layer( + { + "images": images, + "bounding_boxes": bounding_boxes, + "keypoints": keypoints, + "segmentation_masks": segmentation_masks, + } + ) + + bounding_boxes_diff = ( + output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] + ) + keypoints_diff = output["keypoints"] - keypoints + segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) + self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) + self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) + + @tf.function + def in_tf_function(inputs): + return add_layer(inputs) + + output = in_tf_function( + { + "images": images, + "bounding_boxes": bounding_boxes, + "keypoints": keypoints, + "segmentation_masks": segmentation_masks, + } + ) + + bounding_boxes_diff = ( + output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] + ) + keypoints_diff = output["keypoints"] - keypoints + segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) + self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) + self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) + + def test_augment_all_data_in_tf_function(self): + add_layer = VectorizedRandomAddLayer() + images = np.random.random(size=(2, 8, 8, 3)).astype("float32") + bounding_boxes = bounding_boxes = { + "boxes": np.random.random(size=(2, 3, 4)).astype("float32"), + "classes": np.random.random(size=(2, 3)).astype("float32"), + } + keypoints = np.random.random(size=(2, 5, 2)).astype("float32") + segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32") + + @tf.function + def in_tf_function(inputs): + return add_layer(inputs) + + output = in_tf_function( + { + "images": images, + "bounding_boxes": bounding_boxes, + "keypoints": keypoints, + "segmentation_masks": segmentation_masks, + } + ) + + bounding_boxes_diff = ( + output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] + ) + keypoints_diff = output["keypoints"] - keypoints + segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) + self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) + self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) From 4b08a7ddc26a4140a60ed53471034f0991f5dc44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Marcos=20Cardoso=20da=20Silva?= <40339986+JoaoMarcosCSilva@users.noreply.github.com> Date: Thu, 9 Feb 2023 01:12:00 -0300 Subject: [PATCH 13/27] Fix error in `_sample_from_beta` and `cutmix` (#1369) * Fix bug in the _sample_from_beta functions * Fix calculation of cut_width in the CutMix class --- keras_cv/layers/preprocessing/aug_mix.py | 4 ++-- keras_cv/layers/preprocessing/cut_mix.py | 6 +++--- keras_cv/layers/preprocessing/fourier_mix.py | 4 ++-- keras_cv/layers/preprocessing/mix_up.py | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/keras_cv/layers/preprocessing/aug_mix.py b/keras_cv/layers/preprocessing/aug_mix.py index 4940f501e0..f3a15a6815 100644 --- a/keras_cv/layers/preprocessing/aug_mix.py +++ b/keras_cv/layers/preprocessing/aug_mix.py @@ -106,10 +106,10 @@ def _sample_from_dirichlet(self, alpha): def _sample_from_beta(self, alpha, beta): sample_alpha = tf.random.gamma( - (), 1.0, beta=alpha, seed=self._random_generator.make_legacy_seed() + (), alpha=alpha, seed=self._random_generator.make_legacy_seed() ) sample_beta = tf.random.gamma( - (), 1.0, beta=beta, seed=self._random_generator.make_legacy_seed() + (), alpha=beta, seed=self._random_generator.make_legacy_seed() ) return sample_alpha / (sample_alpha + sample_beta) diff --git a/keras_cv/layers/preprocessing/cut_mix.py b/keras_cv/layers/preprocessing/cut_mix.py index c04e8ce5e7..4ec1c328dc 100644 --- a/keras_cv/layers/preprocessing/cut_mix.py +++ b/keras_cv/layers/preprocessing/cut_mix.py @@ -50,10 +50,10 @@ def __init__(self, alpha=1.0, seed=None, **kwargs): def _sample_from_beta(self, alpha, beta, shape): sample_alpha = tf.random.gamma( - shape, 1.0, beta=alpha, seed=self._random_generator.make_legacy_seed() + shape, alpha=alpha, seed=self._random_generator.make_legacy_seed() ) sample_beta = tf.random.gamma( - shape, 1.0, beta=beta, seed=self._random_generator.make_legacy_seed() + shape, alpha=beta, seed=self._random_generator.make_legacy_seed() ) return sample_alpha / (sample_alpha + sample_beta) @@ -98,7 +98,7 @@ def _cutmix(self, images, labels): ratio * tf.cast(image_height, dtype=tf.float32), dtype=tf.int32 ) cut_width = tf.cast( - ratio * tf.cast(image_height, dtype=tf.float32), dtype=tf.int32 + ratio * tf.cast(image_width, dtype=tf.float32), dtype=tf.int32 ) random_center_height = tf.random.uniform( diff --git a/keras_cv/layers/preprocessing/fourier_mix.py b/keras_cv/layers/preprocessing/fourier_mix.py index 43ac3ac83e..1b92f25bd7 100644 --- a/keras_cv/layers/preprocessing/fourier_mix.py +++ b/keras_cv/layers/preprocessing/fourier_mix.py @@ -50,10 +50,10 @@ def __init__(self, alpha=0.5, decay_power=3, seed=None, **kwargs): def _sample_from_beta(self, alpha, beta, shape): sample_alpha = tf.random.gamma( - shape, 1.0, beta=alpha, seed=self._random_generator.make_legacy_seed() + shape, alpha=alpha, seed=self._random_generator.make_legacy_seed() ) sample_beta = tf.random.gamma( - shape, 1.0, beta=beta, seed=self._random_generator.make_legacy_seed() + shape, alpha=beta, seed=self._random_generator.make_legacy_seed() ) return sample_alpha / (sample_alpha + sample_beta) diff --git a/keras_cv/layers/preprocessing/mix_up.py b/keras_cv/layers/preprocessing/mix_up.py index cd59f04264..819f31fea7 100644 --- a/keras_cv/layers/preprocessing/mix_up.py +++ b/keras_cv/layers/preprocessing/mix_up.py @@ -53,10 +53,10 @@ def __init__(self, alpha=0.2, seed=None, **kwargs): def _sample_from_beta(self, alpha, beta, shape): sample_alpha = tf.random.gamma( - shape, 1.0, beta=alpha, seed=self._random_generator.make_legacy_seed() + shape, alpha=alpha, seed=self._random_generator.make_legacy_seed() ) sample_beta = tf.random.gamma( - shape, 1.0, beta=beta, seed=self._random_generator.make_legacy_seed() + shape, alpha=beta, seed=self._random_generator.make_legacy_seed() ) return sample_alpha / (sample_alpha + sample_beta) From 7eefa1eee176a7ebb960490d0ed9aa2b6020a58a Mon Sep 17 00:00:00 2001 From: Jonathan Bischof Date: Thu, 9 Feb 2023 21:03:19 +0000 Subject: [PATCH 14/27] Improve quickstart user experience (#1379) * Consolidate quickstart and add install guide * Add validation set and prefetch. Fix batch size * Change comment order --- README.md | 86 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 2761bb2e13..0275a8e1b3 100644 --- a/README.md +++ b/README.md @@ -31,9 +31,22 @@ To learn more about the future project direction, please check the [roadmap](.gi - [Roadmap](.github/ROADMAP.md) - [API Design Guidelines](.github/API_DESIGN.md) -## Quickstart +## Installation + +To install the latest official release: + +``` +pip install keras-cv tensorflow --upgrade +``` -Create a preprocessing pipeline: +To install the latest unreleased changes to the library, we recommend using +pip to install directly from the master branch on github: + +``` +pip install git+https://github.com/keras-team/keras-cv.git tensorflow --upgrade +``` + +## Quickstart ```python import keras_cv @@ -41,49 +54,50 @@ import tensorflow as tf from tensorflow import keras import tensorflow_datasets as tfds +# Create a preprocessing pipeline augmenter = keras_cv.layers.Augmenter( - layers=[ - keras_cv.layers.RandomFlip(), - keras_cv.layers.RandAugment(value_range=(0, 255)), - keras_cv.layers.CutMix(), - keras_cv.layers.MixUp() + layers=[ + keras_cv.layers.RandomFlip(), + keras_cv.layers.RandAugment(value_range=(0, 255)), + keras_cv.layers.CutMix(), + keras_cv.layers.MixUp() ] ) -def augment_data(images, labels): - labels = tf.one_hot(labels, 3) - inputs = {"images": images, "labels": labels} - outputs = augmenter(inputs) - return outputs['images'], outputs['labels'] -``` - -Augment a `tf.data.Dataset`: - -```python -dataset = tfds.load('rock_paper_scissors', as_supervised=True, split='train') -dataset = dataset.batch(64) -dataset = dataset.map(augment_data, num_parallel_calls=tf.data.AUTOTUNE) -``` - -Create a model: - -```python +def preprocess_data(images, labels, augment=False): + labels = tf.one_hot(labels, 3) + inputs = {"images": images, "labels": labels} + outputs = augmenter(inputs) if augment else inputs + return outputs['images'], outputs['labels'] + +# Augment a `tf.data.Dataset` +train_dataset, test_dataset = tfds.load( + 'rock_paper_scissors', + as_supervised=True, + split=['train', 'test'], +) +train_dataset = train_dataset.batch(16).map( + lambda x, y: preprocess_data(x, y, augment=True), + num_parallel_calls=tf.data.AUTOTUNE).prefetch( + tf.data.AUTOTUNE) +test_dataset = test_dataset.batch(16).map( + preprocess_data, num_parallel_calls=tf.data.AUTOTUNE).prefetch( + tf.data.AUTOTUNE) + +# Create a model densenet = keras_cv.models.DenseNet121( - include_rescaling=True, - include_top=True, - classes=3 + include_rescaling=True, + include_top=True, + classes=3 ) densenet.compile( - loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy'] + loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy'] ) -``` -Train your model: - -```python -densenet.fit(dataset) +# Train your model +densenet.fit(train_dataset, validation_data=test_dataset) ``` ## Contributors From 24fe395229f73d94834b63b9b453e296d863a4af Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Thu, 9 Feb 2023 22:29:18 -0500 Subject: [PATCH 15/27] Migrate KerasCV WaymoOpendata components to use conditional imports (#1362) * Fix imports * Fix imports * Fix pointcloud import names * Add parenthesis * Add conditional imports --- .../callbacks/waymo_evaluation_callback.py | 15 ++++-- .../waymo_evaluation_callback_test.py | 5 +- keras_cv/datasets/waymo/__init__.py | 9 ++-- keras_cv/datasets/waymo/load.py | 10 +++- keras_cv/datasets/waymo/transformer.py | 49 +++++++++++-------- keras_cv/utils/__init__.py | 1 + keras_cv/utils/conditional_imports.py | 29 +++++++++++ 7 files changed, 83 insertions(+), 35 deletions(-) create mode 100644 keras_cv/utils/conditional_imports.py diff --git a/keras_cv/callbacks/waymo_evaluation_callback.py b/keras_cv/callbacks/waymo_evaluation_callback.py index 1162606b03..50d748298b 100644 --- a/keras_cv/callbacks/waymo_evaluation_callback.py +++ b/keras_cv/callbacks/waymo_evaluation_callback.py @@ -13,9 +13,15 @@ # limitations under the License. import tensorflow as tf from keras.callbacks import Callback -from waymo_open_dataset.metrics.python.wod_detection_evaluator import ( - WODDetectionEvaluator, -) + +from keras_cv.utils import assert_waymo_open_dataset_installed + +try: + from waymo_open_dataset.metrics.python.wod_detection_evaluator import ( + WODDetectionEvaluator, + ) +except ImportError: + WODDetectionEvaluator = None from keras_cv.bounding_box_3d import CENTER_XYZ_DXDYDZ_PHI @@ -32,6 +38,9 @@ def __init__(self, validation_data, config=None, **kwargs): config: an optional `metrics_pb2.Config` object from WOD to specify what metrics should be evaluated. """ + assert_waymo_open_dataset_installed( + "keras_cv.callbacks.WaymoEvaluationCallback()" + ) self.model = None self.val_data = validation_data self.evaluator = WODDetectionEvaluator(config=config) diff --git a/keras_cv/callbacks/waymo_evaluation_callback_test.py b/keras_cv/callbacks/waymo_evaluation_callback_test.py index e5a7bbdeb1..2d9d85ac26 100644 --- a/keras_cv/callbacks/waymo_evaluation_callback_test.py +++ b/keras_cv/callbacks/waymo_evaluation_callback_test.py @@ -15,10 +15,7 @@ import pytest import tensorflow as tf -try: - from keras_cv.callbacks import WaymoEvaluationCallback -except ImportError: - pass +from keras_cv.callbacks import WaymoEvaluationCallback NUM_RECORDS = 10 POINT_FEATURES = 3 diff --git a/keras_cv/datasets/waymo/__init__.py b/keras_cv/datasets/waymo/__init__.py index 10d3d08a31..df868ec58a 100644 --- a/keras_cv/datasets/waymo/__init__.py +++ b/keras_cv/datasets/waymo/__init__.py @@ -14,9 +14,6 @@ # Following symbols are only available when Waymo Open Dataset dependencies are # installed. -try: - from keras_cv.datasets.waymo.load import load - from keras_cv.datasets.waymo.transformer import build_tensors_for_augmentation - from keras_cv.datasets.waymo.transformer import build_tensors_from_wod_frame -except ImportError: - pass +from keras_cv.datasets.waymo.load import load +from keras_cv.datasets.waymo.transformer import build_tensors_for_augmentation +from keras_cv.datasets.waymo.transformer import build_tensors_from_wod_frame diff --git a/keras_cv/datasets/waymo/load.py b/keras_cv/datasets/waymo/load.py index 5004547d24..4ef801019f 100644 --- a/keras_cv/datasets/waymo/load.py +++ b/keras_cv/datasets/waymo/load.py @@ -16,15 +16,20 @@ import tensorflow as tf import tensorflow_datasets as tfds -from waymo_open_dataset import dataset_pb2 from keras_cv.datasets.waymo import transformer +from keras_cv.utils import assert_waymo_open_dataset_installed + +try: + import waymo_open_dataset +except ImportError: + waymo_open_dataset = None def _generate_frames(segments, transformer): def _generator(): for record in tfds.as_numpy(segments): - frame = dataset_pb2.Frame() + frame = waymo_open_dataset.dataset_pb2.Frame() frame.ParseFromString(record) yield transformer(frame) @@ -67,6 +72,7 @@ def simple_transformer(frame): load("/path/to/tfrecords", simple_transformer, output_signature) ``` """ + assert_waymo_open_dataset_installed("keras_cv.datasets.waymo.load()") if type(tfrecord_path) == list: filenames = tfrecord_path else: diff --git a/keras_cv/datasets/waymo/transformer.py b/keras_cv/datasets/waymo/transformer.py index 776ef93750..a8dc3ad96d 100644 --- a/keras_cv/datasets/waymo/transformer.py +++ b/keras_cv/datasets/waymo/transformer.py @@ -20,11 +20,17 @@ import numpy as np import tensorflow as tf -from waymo_open_dataset import dataset_pb2 -from waymo_open_dataset.utils import box_utils -from waymo_open_dataset.utils import frame_utils -from waymo_open_dataset.utils import range_image_utils -from waymo_open_dataset.utils import transform_utils + +from keras_cv.utils import assert_waymo_open_dataset_installed + +try: + from waymo_open_dataset import dataset_pb2 + from waymo_open_dataset.utils import box_utils + from waymo_open_dataset.utils import frame_utils + from waymo_open_dataset.utils import range_image_utils + from waymo_open_dataset.utils import transform_utils +except ImportError: + waymo_open_dataset = None from keras_cv.datasets.waymo import struct from keras_cv.layers.object_detection_3d import voxel_utils @@ -55,7 +61,7 @@ _MAX_NUM_NON_TOP_LIDAR_POINTS = 30000 -def _decode_range_images(frame: dataset_pb2.Frame) -> Dict[int, List[tf.Tensor]]: +def _decode_range_images(frame) -> Dict[int, List[tf.Tensor]]: """Decodes range images from a Waymo Open Dataset frame. Please refer to https://arxiv.org/pdf/1912.04838.pdf for more details. @@ -92,7 +98,7 @@ def _decode_range_images(frame: dataset_pb2.Frame) -> Dict[int, List[tf.Tensor]] return range_images -def _get_range_image_top_pose(frame: dataset_pb2.Frame) -> tf.Tensor: +def _get_range_image_top_pose(frame) -> tf.Tensor: """Extracts range image pose tensor. Args: @@ -118,7 +124,7 @@ def _get_range_image_top_pose(frame: dataset_pb2.Frame) -> tf.Tensor: def _get_point_top_lidar( - range_image: Sequence[tf.Tensor], frame: dataset_pb2.Frame + range_image: Sequence[tf.Tensor], frame ) -> struct.PointTensors: """Gets point related tensors for the top lidar. @@ -214,9 +220,7 @@ def _get_point_top_lidar( ) -def _get_lidar_calibration( - frame: dataset_pb2.Frame, name: int -) -> dataset_pb2.LaserCalibration: +def _get_lidar_calibration(frame, name: int): """Gets lidar calibration for a given lidar.""" calibration = None for c in frame.context.laser_calibrations: @@ -244,7 +248,7 @@ def _gather(t: tf.Tensor) -> tf.Tensor: def _get_point_lidar( ris: Dict[int, List[tf.Tensor]], - frame: dataset_pb2.Frame, + frame, max_num_points: int, ) -> struct.PointTensors: """Gets point related tensors for non top lidar. @@ -319,9 +323,7 @@ def _get_point_lidar( return point_tensors -def _get_point( - frame: dataset_pb2.Frame, max_num_lidar_points: int -) -> struct.PointTensors: +def _get_point(frame, max_num_lidar_points: int) -> struct.PointTensors: """Gets point related tensors from a Waymo Open Dataset frame. Args: @@ -348,7 +350,7 @@ def _get_point( def _get_point_label_box( - frame: dataset_pb2.Frame, + frame, ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: """Extracts 3D box labels from a Waymo Open Dataset frame. @@ -448,9 +450,7 @@ def _get_box_class_per_point( return point_box_class -def _get_point_label( - frame: dataset_pb2.Frame, point_xyz: tf.Tensor -) -> struct.LabelTensors: +def _get_point_label(frame, point_xyz: tf.Tensor) -> struct.LabelTensors: """Extracts labels. Args: @@ -564,7 +564,7 @@ def _box_3d_global_to_vehicle(box_3d: tf.Tensor, sdc_pose: tf.Tensor) -> tf.Tens return tf.concat([new_center, dim, new_heading[..., tf.newaxis]], axis=-1) -def build_tensors_from_wod_frame(frame: dataset_pb2.Frame) -> Dict[str, tf.Tensor]: +def build_tensors_from_wod_frame(frame) -> Dict[str, tf.Tensor]: """Builds tensors from a Waymo Open Dataset frame. This function is to convert range image to point cloud. User can also work with @@ -576,6 +576,9 @@ def build_tensors_from_wod_frame(frame: dataset_pb2.Frame) -> Dict[str, tf.Tenso Returns: Flat dictionary of tensors. """ + assert_waymo_open_dataset_installed( + "keras_cv.datasets.waymo.build_tensors_from_wod_frame()" + ) frame_id_bytes = "{}_{}".format(frame.context.name, frame.timestamp_micros).encode( encoding="ascii" @@ -678,6 +681,9 @@ def transform_to_vehicle_frame(frame: Dict[str, tf.Tensor]) -> Dict[str, tf.Tens Returns: A dictionary of feature tensors in vehicle frame. """ + assert_waymo_open_dataset_installed( + "keras_cv.datasets.waymo.transform_to_vehicle_frame()" + ) def _transform_to_vehicle_frame( point_global_xyz: tf.Tensor, @@ -726,6 +732,9 @@ def build_tensors_for_augmentation( and values which are tensors of shapes [num points, num features] and [num boxes, num features]). """ + assert_waymo_open_dataset_installed( + "keras_cv.datasets.waymo.build_tensors_for_augmentation()" + ) point_cloud = tf.concat( [ frame["point_xyz"][tf.newaxis, ...], diff --git a/keras_cv/utils/__init__.py b/keras_cv/utils/__init__.py index e4e601fadb..046db85c17 100644 --- a/keras_cv/utils/__init__.py +++ b/keras_cv/utils/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. from keras_cv.utils import test_utils +from keras_cv.utils.conditional_imports import assert_waymo_open_dataset_installed from keras_cv.utils.fill_utils import fill_rectangle from keras_cv.utils.preprocessing import blend from keras_cv.utils.preprocessing import ensure_tensor diff --git a/keras_cv/utils/conditional_imports.py b/keras_cv/utils/conditional_imports.py new file mode 100644 index 0000000000..4987b6b00c --- /dev/null +++ b/keras_cv/utils/conditional_imports.py @@ -0,0 +1,29 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +try: + import waymo_open_dataset +except ImportError: + waymo_open_dataset = None + + +def assert_waymo_open_dataset_installed(symbol_name): + if waymo_open_dataset is None: + raise ImportError( + f"{symbol_name} requires the `waymo-open-dataset-tf` package. " + "Please install the package from source. " + "Installation instructions can be found at " + "https://github.com/waymo-research/waymo-open-dataset" + "/blob/master/docs/quick_start.md" + ) From fc3bea368db0af1ee9b985378b0dde76c8c906a2 Mon Sep 17 00:00:00 2001 From: Jonathan Bischof Date: Fri, 10 Feb 2023 19:34:25 +0000 Subject: [PATCH 16/27] Test model serialization (#1367) * Test model serialization * Format * Add tests for `keras_v3` file format * Use named parameters --- keras_cv/models/models_test.py | 14 ++++++++++++++ keras_cv/models/resnet_v2_test.py | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/keras_cv/models/models_test.py b/keras_cv/models/models_test.py index a05d2f2fed..52fa1d361c 100644 --- a/keras_cv/models/models_test.py +++ b/keras_cv/models/models_test.py @@ -13,6 +13,8 @@ # limitations under the License. """Integration tests for KerasCV models.""" +import os + import pytest import tensorflow as tf from tensorflow import keras @@ -156,6 +158,18 @@ def _test_model_can_be_used_as_backbone(self, app, last_dim, args): model = keras.Model(inputs=inputs, outputs=[backbone_output]) model.compile() + def _test_model_serialization(self, app, _, args, save_format, filename): + model = app(include_rescaling=True, include_top=False, **args) + input_batch = tf.ones(shape=(16, 224, 224, 3)) + model_output = model(input_batch) + save_path = os.path.join(self.get_temp_dir(), filename) + model.save(save_path, save_format=save_format) + restored_model = keras.models.load_model(save_path) + + # Check that output matches. + restored_output = restored_model(input_batch) + self.assertAllClose(model_output, restored_output) + if __name__ == "__main__": tf.test.main() diff --git a/keras_cv/models/resnet_v2_test.py b/keras_cv/models/resnet_v2_test.py index 82244de5dd..d890c1241f 100644 --- a/keras_cv/models/resnet_v2_test.py +++ b/keras_cv/models/resnet_v2_test.py @@ -54,6 +54,15 @@ def test_application_variable_input_channels(self, app, last_dim, args): def test_model_can_be_used_as_backbone(self, app, last_dim, args): super()._test_model_can_be_used_as_backbone(app, last_dim, args) + @parameterized.parameters(*MODEL_LIST) + def test_model_serialization(self, app, last_dim, args): + super()._test_model_serialization( + app, last_dim, args, save_format="tf", filename="model" + ) + super()._test_model_serialization( + app, last_dim, args, save_format="keras_v3", filename="model.keras" + ) + def test_model_backbone_layer_names_stability(self): model = resnet_v2.ResNet50V2( include_rescaling=False, From c8649b48119e8af398a58217d7d5b677fa941515 Mon Sep 17 00:00:00 2001 From: Neel Kovelamudi <60985914+nkovela1@users.noreply.github.com> Date: Fri, 10 Feb 2023 12:05:48 -0800 Subject: [PATCH 17/27] Preparation for switch in Keras serialization format (#1374) * Refactoring deserialization of KerasCV objects in preparation for Keras serialization format switch * Modified deserialization to be backward-compatible with config check * Fixed backwards compatibility for transformer encoder serialization * Added comment on keras.__internal__ namespace support in future --- .../core/factor_sampler/constant_factor_sampler.py | 4 ++++ .../core/factor_sampler/normal_factor_sampler.py | 4 ++++ .../core/factor_sampler/uniform_factor_sampler.py | 4 ++++ keras_cv/layers/preprocessing/augmenter.py | 6 ++++++ .../preprocessing/random_augmentation_pipeline.py | 9 +++++++++ .../preprocessing/random_color_degeneration.py | 6 ++++++ .../layers/preprocessing/random_crop_and_resize.py | 12 ++++++++++++ keras_cv/layers/preprocessing/random_saturation.py | 6 ++++++ .../layers/preprocessing/randomly_zoomed_crop.py | 12 ++++++++++++ .../layers/preprocessing/repeated_augmentation.py | 8 ++++++++ keras_cv/layers/preprocessing/solarization.py | 12 ++++++++++++ keras_cv/layers/regularization/squeeze_excite.py | 12 ++++++++++++ keras_cv/layers/transformer_encoder.py | 8 ++++++-- keras_cv/models/csp_darknet_test.py | 2 ++ keras_cv/utils/preprocessing.py | 6 ++++++ 15 files changed, 109 insertions(+), 2 deletions(-) diff --git a/keras_cv/core/factor_sampler/constant_factor_sampler.py b/keras_cv/core/factor_sampler/constant_factor_sampler.py index 8d8cdb8863..2e7aee9baf 100644 --- a/keras_cv/core/factor_sampler/constant_factor_sampler.py +++ b/keras_cv/core/factor_sampler/constant_factor_sampler.py @@ -42,3 +42,7 @@ def __call__(self, shape=(), dtype="float32"): def get_config(self): return {"value": self.value} + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/keras_cv/core/factor_sampler/normal_factor_sampler.py b/keras_cv/core/factor_sampler/normal_factor_sampler.py index 95243372a5..26a03e07f6 100644 --- a/keras_cv/core/factor_sampler/normal_factor_sampler.py +++ b/keras_cv/core/factor_sampler/normal_factor_sampler.py @@ -71,3 +71,7 @@ def get_config(self): "max_value": self.max_value, "seed": self.seed, } + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/keras_cv/core/factor_sampler/uniform_factor_sampler.py b/keras_cv/core/factor_sampler/uniform_factor_sampler.py index 9a4c6bb1ee..02896c9a27 100644 --- a/keras_cv/core/factor_sampler/uniform_factor_sampler.py +++ b/keras_cv/core/factor_sampler/uniform_factor_sampler.py @@ -52,3 +52,7 @@ def get_config(self): "upper": self.upper, "seed": self.seed, } + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/keras_cv/layers/preprocessing/augmenter.py b/keras_cv/layers/preprocessing/augmenter.py index 1baf7b78e3..e6448f6fb4 100644 --- a/keras_cv/layers/preprocessing/augmenter.py +++ b/keras_cv/layers/preprocessing/augmenter.py @@ -35,3 +35,9 @@ def get_config(self): config = super().get_config() config.update({"layers": self.layers}) return config + + @classmethod + def from_config(cls, config): + if config["layers"] and isinstance(config["layers"][0], dict): + config["layers"] = tf.keras.utils.deserialize_keras_object(config["layers"]) + return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_augmentation_pipeline.py b/keras_cv/layers/preprocessing/random_augmentation_pipeline.py index 662d1e9e03..cd75ebb303 100644 --- a/keras_cv/layers/preprocessing/random_augmentation_pipeline.py +++ b/keras_cv/layers/preprocessing/random_augmentation_pipeline.py @@ -114,3 +114,12 @@ def get_config(self): } ) return config + + @classmethod + def from_config(cls, config): + layers = config.pop("layers", None) + if layers: + if isinstance(layers[0], dict): + layers = tf.keras.utils.deserialize_keras_object(layers) + config["layers"] = layers + return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_color_degeneration.py b/keras_cv/layers/preprocessing/random_color_degeneration.py index 7932339ab6..47717616d3 100644 --- a/keras_cv/layers/preprocessing/random_color_degeneration.py +++ b/keras_cv/layers/preprocessing/random_color_degeneration.py @@ -75,3 +75,9 @@ def get_config(self): config = super().get_config() config.update({"factor": self.factor, "seed": self.seed}) return config + + @classmethod + def from_config(cls, config): + if isinstance(config["factor"], dict): + config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_crop_and_resize.py b/keras_cv/layers/preprocessing/random_crop_and_resize.py index 9c859cc9b2..0af9e5c692 100644 --- a/keras_cv/layers/preprocessing/random_crop_and_resize.py +++ b/keras_cv/layers/preprocessing/random_crop_and_resize.py @@ -270,6 +270,18 @@ def get_config(self): ) return config + @classmethod + def from_config(cls, config): + if isinstance(config["crop_area_factor"], dict): + config["crop_area_factor"] = tf.keras.utils.deserialize_keras_object( + config["crop_area_factor"] + ) + if isinstance(config["aspect_ratio_factor"], dict): + config["aspect_ratio_factor"] = tf.keras.utils.deserialize_keras_object( + config["aspect_ratio_factor"] + ) + return cls(**config) + def _crop_and_resize(self, image, transformation, method=None): image = tf.expand_dims(image, axis=0) boxes = transformation diff --git a/keras_cv/layers/preprocessing/random_saturation.py b/keras_cv/layers/preprocessing/random_saturation.py index 9cdeb4b8f9..01288a1b8e 100644 --- a/keras_cv/layers/preprocessing/random_saturation.py +++ b/keras_cv/layers/preprocessing/random_saturation.py @@ -87,3 +87,9 @@ def get_config(self): } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if isinstance(config["factor"], dict): + config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + return cls(**config) diff --git a/keras_cv/layers/preprocessing/randomly_zoomed_crop.py b/keras_cv/layers/preprocessing/randomly_zoomed_crop.py index c92f8486f4..b3a034bdef 100644 --- a/keras_cv/layers/preprocessing/randomly_zoomed_crop.py +++ b/keras_cv/layers/preprocessing/randomly_zoomed_crop.py @@ -226,6 +226,18 @@ def get_config(self): ) return config + @classmethod + def from_config(cls, config): + if isinstance(config["zoom_factor"], dict): + config["zoom_factor"] = tf.keras.utils.deserialize_keras_object( + config["zoom_factor"] + ) + if isinstance(config["aspect_ratio_factor"], dict): + config["aspect_ratio_factor"] = tf.keras.utils.deserialize_keras_object( + config["aspect_ratio_factor"] + ) + return cls(**config) + def _crop_and_resize(self, image, transformation, method=None): image = tf.expand_dims(image, axis=0) boxes = transformation diff --git a/keras_cv/layers/preprocessing/repeated_augmentation.py b/keras_cv/layers/preprocessing/repeated_augmentation.py index 25fefc727a..aa7bdc8e76 100644 --- a/keras_cv/layers/preprocessing/repeated_augmentation.py +++ b/keras_cv/layers/preprocessing/repeated_augmentation.py @@ -111,3 +111,11 @@ def get_config(self): config = super().get_config() config.update({"augmenters": self.augmenters, "shuffle": self.shuffle}) return config + + @classmethod + def from_config(cls, config): + if config["augmenters"] and isinstance(config["augmenters"][0], dict): + config["augmenters"] = tf.keras.utils.deserialize_keras_object( + config["augmenters"] + ) + return cls(**config) diff --git a/keras_cv/layers/preprocessing/solarization.py b/keras_cv/layers/preprocessing/solarization.py index bcc9f61fe9..7272574e7e 100644 --- a/keras_cv/layers/preprocessing/solarization.py +++ b/keras_cv/layers/preprocessing/solarization.py @@ -130,3 +130,15 @@ def get_config(self): } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if isinstance(config["threshold_factor"], dict): + config["threshold_factor"] = tf.keras.utils.deserialize_keras_object( + config["threshold_factor"] + ) + if isinstance(config["addition_factor"], dict): + config["addition_factor"] = tf.keras.utils.deserialize_keras_object( + config["addition_factor"] + ) + return cls(**config) diff --git a/keras_cv/layers/regularization/squeeze_excite.py b/keras_cv/layers/regularization/squeeze_excite.py index 5939848a98..261a7c7f91 100644 --- a/keras_cv/layers/regularization/squeeze_excite.py +++ b/keras_cv/layers/regularization/squeeze_excite.py @@ -105,3 +105,15 @@ def get_config(self): } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if isinstance(config["squeeze_activation"], dict): + config["squeeze_activation"] = tf.keras.utils.deserialize_keras_object( + config["squeeze_activation"] + ) + if isinstance(config["excite_activation"], dict): + config["excite_activation"] = tf.keras.utils.deserialize_keras_object( + config["excite_activation"] + ) + return cls(**config) diff --git a/keras_cv/layers/transformer_encoder.py b/keras_cv/layers/transformer_encoder.py index f80575bd73..87465921d1 100644 --- a/keras_cv/layers/transformer_encoder.py +++ b/keras_cv/layers/transformer_encoder.py @@ -113,6 +113,9 @@ def call(self, inputs): def get_config(self): config = super().get_config() + activation = self.activation + if not isinstance(activation, (str, dict)): + activation = tf.keras.activations.serialize(activation) config.update( { "project_dim": self.project_dim, @@ -120,7 +123,7 @@ def get_config(self): "num_heads": self.num_heads, "attention_dropout": self.attention_dropout, "mlp_dropout": self.mlp_dropout, - "activation": self.activation, + "activation": activation, "layer_norm_epsilon": self.layer_norm_epsilon, } ) @@ -129,5 +132,6 @@ def get_config(self): @classmethod def from_config(cls, config, custom_objects=None): activation = config.pop("activation") - activation = tf.keras.activations.deserialize(activation) + if isinstance(activation, (str, dict)): + activation = tf.keras.activations.deserialize(activation) return cls(activation=activation, **config) diff --git a/keras_cv/models/csp_darknet_test.py b/keras_cv/models/csp_darknet_test.py index 4d25109483..99942be6e1 100644 --- a/keras_cv/models/csp_darknet_test.py +++ b/keras_cv/models/csp_darknet_test.py @@ -29,6 +29,8 @@ class CSPDarkNetTest(ModelsTest, tf.test.TestCase, parameterized.TestCase): @parameterized.parameters(*MODEL_LIST) def test_application_base(self, app, _, args): + if hasattr(tf.keras.__internal__, "enable_unsafe_deserialization"): + tf.keras.__internal__.enable_unsafe_deserialization() super()._test_application_base(app, _, args) @parameterized.parameters(*MODEL_LIST) diff --git a/keras_cv/utils/preprocessing.py b/keras_cv/utils/preprocessing.py index 5a4b8beffa..5535d0d0d3 100644 --- a/keras_cv/utils/preprocessing.py +++ b/keras_cv/utils/preprocessing.py @@ -126,6 +126,12 @@ def blend(image1: tf.Tensor, image2: tf.Tensor, factor: float) -> tf.Tensor: def parse_factor(param, min_value=0.0, max_value=1.0, param_name="factor", seed=None): + if isinstance(param, dict): + # For all classes missing a `from_config` implementation. + # (RandomHue, RandomShear, etc.) + # To be removed with addition of `keras.__internal__` namespace support + param = tf.keras.utils.deserialize_keras_object(param) + if isinstance(param, core.FactorSampler): return param From f67cac84bdbefc16210c11a2cd6e6bdcb874e0aa Mon Sep 17 00:00:00 2001 From: Ian Stenbit <3072903+ianstenbit@users.noreply.github.com> Date: Fri, 10 Feb 2023 17:06:15 -0500 Subject: [PATCH 18/27] Support non-default dtypes in 3D preprocessing layers (#1388) --- .../frustum_random_point_feature_noise.py | 1 + .../preprocessing_3d/global_random_rotation.py | 15 ++++++++++++--- .../preprocessing_3d/global_random_scaling.py | 15 ++++++++++++--- .../preprocessing_3d/global_random_translation.py | 15 +++++++++------ 4 files changed, 34 insertions(+), 12 deletions(-) diff --git a/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py b/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py index 8fc2d087f3..f40756180a 100644 --- a/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py +++ b/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py @@ -122,6 +122,7 @@ def get_random_transformation(self, point_clouds, **kwargs): ) # Do add feature noise outside the frustum mask. random_point_noise = tf.where(~frustum_mask, 1.0, noise) + random_point_noise = tf.cast(random_point_noise, dtype=self.compute_dtype) return {"point_noise": random_point_noise} def augment_point_clouds_bounding_boxes( diff --git a/keras_cv/layers/preprocessing_3d/global_random_rotation.py b/keras_cv/layers/preprocessing_3d/global_random_rotation.py index 5cdd95e1d9..6b952634d2 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_rotation.py +++ b/keras_cv/layers/preprocessing_3d/global_random_rotation.py @@ -83,13 +83,22 @@ def get_config(self): def get_random_transformation(self, **kwargs): random_rotation_x = self._random_generator.random_uniform( - (), minval=-self._max_rotation_angle_x, maxval=self._max_rotation_angle_x + (), + minval=-self._max_rotation_angle_x, + maxval=self._max_rotation_angle_x, + dtype=self.compute_dtype, ) random_rotation_y = self._random_generator.random_uniform( - (), minval=-self._max_rotation_angle_y, maxval=self._max_rotation_angle_y + (), + minval=-self._max_rotation_angle_y, + maxval=self._max_rotation_angle_y, + dtype=self.compute_dtype, ) random_rotation_z = self._random_generator.random_uniform( - (), minval=-self._max_rotation_angle_z, maxval=self._max_rotation_angle_z + (), + minval=-self._max_rotation_angle_z, + maxval=self._max_rotation_angle_z, + dtype=self.compute_dtype, ) return { "pose": tf.stack( diff --git a/keras_cv/layers/preprocessing_3d/global_random_scaling.py b/keras_cv/layers/preprocessing_3d/global_random_scaling.py index bc061392a5..9561e4ee5e 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_scaling.py +++ b/keras_cv/layers/preprocessing_3d/global_random_scaling.py @@ -138,13 +138,22 @@ def get_config(self): def get_random_transformation(self, **kwargs): random_scaling_x = self._random_generator.random_uniform( - (), minval=self._min_x_factor, maxval=self._max_x_factor + (), + minval=self._min_x_factor, + maxval=self._max_x_factor, + dtype=self.compute_dtype, ) random_scaling_y = self._random_generator.random_uniform( - (), minval=self._min_y_factor, maxval=self._max_y_factor + (), + minval=self._min_y_factor, + maxval=self._max_y_factor, + dtype=self.compute_dtype, ) random_scaling_z = self._random_generator.random_uniform( - (), minval=self._min_z_factor, maxval=self._max_z_factor + (), + minval=self._min_z_factor, + maxval=self._max_z_factor, + dtype=self.compute_dtype, ) if not self._preserve_aspect_ratio: return { diff --git a/keras_cv/layers/preprocessing_3d/global_random_translation.py b/keras_cv/layers/preprocessing_3d/global_random_translation.py index 1fd6092721..5509f0a13e 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_translation.py +++ b/keras_cv/layers/preprocessing_3d/global_random_translation.py @@ -71,13 +71,16 @@ def get_config(self): def get_random_transformation(self, **kwargs): random_x_translation = self._random_generator.random_normal( - (), mean=0.0, stddev=self._x_stddev + (), mean=0.0, stddev=self._x_stddev, dtype=self.compute_dtype ) random_y_translation = self._random_generator.random_normal( - (), mean=0.0, stddev=self._y_stddev + (), mean=0.0, stddev=self._y_stddev, dtype=self.compute_dtype ) random_z_translation = self._random_generator.random_normal( - (), mean=0.0, stddev=self._z_stddev + (), + mean=0.0, + stddev=self._z_stddev, + dtype=self.compute_dtype, ) return { "pose": tf.stack( @@ -85,9 +88,9 @@ def get_random_transformation(self, **kwargs): random_x_translation, random_y_translation, random_z_translation, - 0, - 0, - 0, + 0.0, + 0.0, + 0.0, ], axis=0, ) From b4513f3fe11fba0298fcb8c27f393c49d7b1f993 Mon Sep 17 00:00:00 2001 From: HongYu <20734616+james77777778@users.noreply.github.com> Date: Thu, 16 Feb 2023 12:42:43 +0800 Subject: [PATCH 19/27] Vectorized random saturation (#1392) * Vectorized random saturation * Fix tests - use ellipsis to prevent dimension error in adjust_factors - rename s_channel_batch to s_channel - fix not implement error for augment_bounding_boxes and augment_labels - remove serialization registration in OldRandomSaturation * Fix with_mixed_precision_test * Remove serialization registration in benchmark --- benchmarks/vectorized_random_saturation.py | 174 ++++++++++++++++++ .../layers/preprocessing/random_saturation.py | 54 ++++-- .../preprocessing/random_saturation_test.py | 107 ++++++++++- 3 files changed, 318 insertions(+), 17 deletions(-) create mode 100644 benchmarks/vectorized_random_saturation.py diff --git a/benchmarks/vectorized_random_saturation.py b/benchmarks/vectorized_random_saturation.py new file mode 100644 index 0000000000..426d4f8382 --- /dev/null +++ b/benchmarks/vectorized_random_saturation.py @@ -0,0 +1,174 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf +from tensorflow import keras + +from keras_cv.layers import RandomSaturation +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) +from keras_cv.utils import preprocessing as preprocessing_utils + + +class OldRandomSaturation(BaseImageAugmentationLayer): + """Randomly adjusts the saturation on given images. + + This layer will randomly increase/reduce the saturation for the input RGB + images. At inference time, the output will be identical to the input. + Call the layer with `training=True` to adjust the saturation of the input. + + Args: + factor: A tuple of two floats, a single float or `keras_cv.FactorSampler`. + `factor` controls the extent to which the image saturation is impacted. + `factor=0.5` makes this layer perform a no-op operation. `factor=0.0` makes + the image to be fully grayscale. `factor=1.0` makes the image to be fully + saturated. + Values should be between `0.0` and `1.0`. If a tuple is used, a `factor` + is sampled between the two values for every image augmented. If a single + float is used, a value between `0.0` and the passed float is sampled. + In order to ensure the value is always the same, please pass a tuple with + two identical floats: `(0.5, 0.5)`. + seed: Integer. Used to create a random seed. + """ + + def __init__(self, factor, seed=None, **kwargs): + super().__init__(seed=seed, **kwargs) + self.factor = preprocessing_utils.parse_factor( + factor, + min_value=0.0, + max_value=1.0, + ) + self.seed = seed + + def get_random_transformation(self, **kwargs): + return self.factor() + + def augment_image(self, image, transformation=None, **kwargs): + # Convert the factor range from [0, 1] to [0, +inf]. Note that the + # tf.image.adjust_saturation is trying to apply the following math formula + # `output_saturation = input_saturation * factor`. We use the following + # method to the do the mapping. + # `y = x / (1 - x)`. + # This will ensure: + # y = +inf when x = 1 (full saturation) + # y = 1 when x = 0.5 (no augmentation) + # y = 0 when x = 0 (full gray scale) + + # Convert the transformation to tensor in case it is a float. When + # transformation is 1.0, then it will result in to divide by zero error, but + # it will be handled correctly when it is a one tensor. + transformation = tf.convert_to_tensor(transformation) + adjust_factor = transformation / (1 - transformation) + return tf.image.adjust_saturation(image, saturation_factor=adjust_factor) + + def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + return bounding_boxes + + def augment_label(self, label, transformation=None, **kwargs): + return label + + def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + return segmentation_mask + + def get_config(self): + config = { + "factor": self.factor, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if isinstance(config["factor"], dict): + config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + return cls(**config) + + +(x_train, _), _ = keras.datasets.cifar10.load_data() +x_train = x_train.astype(np.float32) +x_train.shape + +num_images = [1000, 2000, 5000, 10000] +results = {} +aug_candidates = [RandomSaturation, OldRandomSaturation] +aug_args = {"factor": (0.5)} + +for aug in aug_candidates: + c = aug.__name__ + + layer = aug(**aug_args) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + + results[c] = runtimes + + c = aug.__name__ + " Graph Mode" + + layer = aug(**aug_args) + + @tf.function() + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + + results[c] = runtimes + +plt.figure() +for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + +plt.ylabel("Runtime (seconds)") +plt.legend() +plt.savefig("comparison.png") + +# So we can actually see more relevant margins +del results[aug_candidates[1].__name__] + +plt.figure() +for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + +plt.ylabel("Runtime (seconds)") +plt.legend() +plt.savefig("comparison_no_old_eager.png") diff --git a/keras_cv/layers/preprocessing/random_saturation.py b/keras_cv/layers/preprocessing/random_saturation.py index 01288a1b8e..a12d27cae1 100644 --- a/keras_cv/layers/preprocessing/random_saturation.py +++ b/keras_cv/layers/preprocessing/random_saturation.py @@ -1,4 +1,4 @@ -# Copyright 2022 The KerasCV Authors +# Copyright 2023 The KerasCV Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,16 +11,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import tensorflow as tf -from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( - BaseImageAugmentationLayer, +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, ) -from keras_cv.utils import preprocessing +from keras_cv.utils import preprocessing as preprocessing_utils @tf.keras.utils.register_keras_serializable(package="keras_cv") -class RandomSaturation(BaseImageAugmentationLayer): +class RandomSaturation(VectorizedBaseImageAugmentationLayer): """Randomly adjusts the saturation on given images. This layer will randomly increase/reduce the saturation for the input RGB @@ -39,21 +40,33 @@ class RandomSaturation(BaseImageAugmentationLayer): In order to ensure the value is always the same, please pass a tuple with two identical floats: `(0.5, 0.5)`. seed: Integer. Used to create a random seed. + + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + random_saturation = keras_cv.layers.preprocessing.RandomSaturation() + augmented_images = random_saturation(images) + ``` """ def __init__(self, factor, seed=None, **kwargs): super().__init__(seed=seed, **kwargs) - self.factor = preprocessing.parse_factor( + self.factor = preprocessing_utils.parse_factor( factor, min_value=0.0, max_value=1.0, ) self.seed = seed - def get_random_transformation(self, **kwargs): - return self.factor() + def get_random_transformation_batch(self, batch_size, **kwargs): + return self.factor(shape=(batch_size,)) + + def augment_ragged_image(self, image, transformation, **kwargs): + return self.augment_images( + images=image, transformations=transformation, **kwargs + ) - def augment_image(self, image, transformation=None, **kwargs): + def augment_images(self, images, transformations, **kwargs): # Convert the factor range from [0, 1] to [0, +inf]. Note that the # tf.image.adjust_saturation is trying to apply the following math formula # `output_saturation = input_saturation * factor`. We use the following @@ -67,18 +80,27 @@ def augment_image(self, image, transformation=None, **kwargs): # Convert the transformation to tensor in case it is a float. When # transformation is 1.0, then it will result in to divide by zero error, but # it will be handled correctly when it is a one tensor. - transformation = tf.convert_to_tensor(transformation) - adjust_factor = transformation / (1 - transformation) - return tf.image.adjust_saturation(image, saturation_factor=adjust_factor) + transformations = tf.convert_to_tensor(transformations) + adjust_factors = transformations / (1 - transformations) + adjust_factors = tf.cast(adjust_factors, dtype=images.dtype) + + images = tf.image.rgb_to_hsv(images) + s_channel = tf.multiply( + images[..., 1], adjust_factors[..., tf.newaxis, tf.newaxis] + ) + s_channel = tf.clip_by_value(s_channel, clip_value_min=0.0, clip_value_max=1.0) + images = tf.stack([images[..., 0], s_channel, images[..., 2]], axis=-1) + images = tf.image.hsv_to_rgb(images) + return images def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): return bounding_boxes - def augment_label(self, label, transformation=None, **kwargs): - return label + def augment_labels(self, labels, transformations=None, **kwargs): + return labels - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): - return segmentation_mask + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + return segmentation_masks def get_config(self): config = { diff --git a/keras_cv/layers/preprocessing/random_saturation_test.py b/keras_cv/layers/preprocessing/random_saturation_test.py index 0a56374af3..c50e9c470d 100644 --- a/keras_cv/layers/preprocessing/random_saturation_test.py +++ b/keras_cv/layers/preprocessing/random_saturation_test.py @@ -1,4 +1,4 @@ -# Copyright 2022 The KerasCV Authors +# Copyright 2023 The KerasCV Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +15,85 @@ from keras_cv import core from keras_cv.layers import preprocessing +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) +from keras_cv.utils import preprocessing as preprocessing_utils + + +class OldRandomSaturation(BaseImageAugmentationLayer): + """Randomly adjusts the saturation on given images. + + This layer will randomly increase/reduce the saturation for the input RGB + images. At inference time, the output will be identical to the input. + Call the layer with `training=True` to adjust the saturation of the input. + + Args: + factor: A tuple of two floats, a single float or `keras_cv.FactorSampler`. + `factor` controls the extent to which the image saturation is impacted. + `factor=0.5` makes this layer perform a no-op operation. `factor=0.0` makes + the image to be fully grayscale. `factor=1.0` makes the image to be fully + saturated. + Values should be between `0.0` and `1.0`. If a tuple is used, a `factor` + is sampled between the two values for every image augmented. If a single + float is used, a value between `0.0` and the passed float is sampled. + In order to ensure the value is always the same, please pass a tuple with + two identical floats: `(0.5, 0.5)`. + seed: Integer. Used to create a random seed. + """ + + def __init__(self, factor, seed=None, **kwargs): + super().__init__(seed=seed, **kwargs) + self.factor = preprocessing_utils.parse_factor( + factor, + min_value=0.0, + max_value=1.0, + ) + self.seed = seed + + def get_random_transformation(self, **kwargs): + return self.factor() + + def augment_image(self, image, transformation=None, **kwargs): + # Convert the factor range from [0, 1] to [0, +inf]. Note that the + # tf.image.adjust_saturation is trying to apply the following math formula + # `output_saturation = input_saturation * factor`. We use the following + # method to the do the mapping. + # `y = x / (1 - x)`. + # This will ensure: + # y = +inf when x = 1 (full saturation) + # y = 1 when x = 0.5 (no augmentation) + # y = 0 when x = 0 (full gray scale) + + # Convert the transformation to tensor in case it is a float. When + # transformation is 1.0, then it will result in to divide by zero error, but + # it will be handled correctly when it is a one tensor. + transformation = tf.convert_to_tensor(transformation) + adjust_factor = transformation / (1 - transformation) + return tf.image.adjust_saturation(image, saturation_factor=adjust_factor) + + def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + return bounding_boxes + + def augment_label(self, label, transformation=None, **kwargs): + return label + + def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + return segmentation_mask + + def get_config(self): + config = { + "factor": self.factor, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if isinstance(config["factor"], dict): + config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + return cls(**config) class RandomSaturationTest(tf.test.TestCase): @@ -93,3 +172,29 @@ def test_config(self): self.assertTrue(isinstance(config["factor"], core.UniformFactorSampler)) self.assertEqual(config["factor"].get_config()["lower"], 0.3) self.assertEqual(config["factor"].get_config()["upper"], 0.8) + + def test_correctness_with_tf_adjust_saturation_normalized_range(self): + image_shape = (16, 32, 32, 3) + fixed_factor = (0.8, 0.8) + image = tf.random.uniform(shape=image_shape) + + layer = preprocessing.RandomSaturation(factor=fixed_factor) + old_layer = OldRandomSaturation(factor=fixed_factor) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output, atol=1e-5, rtol=1e-5) + + def test_correctness_with_tf_adjust_saturation_rgb_range(self): + image_shape = (16, 32, 32, 3) + fixed_factor = (0.8, 0.8) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = preprocessing.RandomSaturation(factor=fixed_factor) + old_layer = OldRandomSaturation(factor=fixed_factor) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output, atol=1e-3, rtol=1e-5) From 1411bf2948195d7efa0ffa73b9f1f2a85caeae46 Mon Sep 17 00:00:00 2001 From: HongYu <20734616+james77777778@users.noreply.github.com> Date: Fri, 17 Feb 2023 05:52:07 +0800 Subject: [PATCH 20/27] Add vectorized RandomBrightness, RandomContrast, RandomHue and RandomColorJitter (#1406) * vectorize RandomBrightness * vectorize RandomContrast * vectorize RandomHue * vectorize RandomColorJitter --- benchmarks/vectorized_random_brightness.py | 233 ++++++++++++++++ benchmarks/vectorized_random_color_jitter.py | 257 ++++++++++++++++++ benchmarks/vectorized_random_contrast.py | 223 +++++++++++++++ benchmarks/vectorized_random_hue.py | 206 ++++++++++++++ .../layers/preprocessing/random_brightness.py | 78 +++--- .../preprocessing/random_color_jitter.py | 49 ++-- .../layers/preprocessing/random_contrast.py | 59 ++-- keras_cv/layers/preprocessing/random_hue.py | 72 +++-- .../layers/preprocessing/random_hue_test.py | 8 +- 9 files changed, 1090 insertions(+), 95 deletions(-) create mode 100644 benchmarks/vectorized_random_brightness.py create mode 100644 benchmarks/vectorized_random_color_jitter.py create mode 100644 benchmarks/vectorized_random_contrast.py create mode 100644 benchmarks/vectorized_random_hue.py diff --git a/benchmarks/vectorized_random_brightness.py b/benchmarks/vectorized_random_brightness.py new file mode 100644 index 0000000000..823b6fdfc9 --- /dev/null +++ b/benchmarks/vectorized_random_brightness.py @@ -0,0 +1,233 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +from keras_cv.layers import RandomBrightness +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) +from keras_cv.utils import preprocessing as preprocessing_utils + + +class OldRandomBrightness(BaseImageAugmentationLayer): + """A preprocessing layer which randomly adjusts brightness during training. + This layer will randomly increase/reduce the brightness for the input RGB + images. + + At inference time, the output will be identical to the input. + Call the layer with `training=True` to adjust the brightness of the input. + + Note that different brightness adjustment factors + will be apply to each the images in the batch. + + Args: + factor: Float or a list/tuple of 2 floats between -1.0 and 1.0. The + factor is used to determine the lower bound and upper bound of the + brightness adjustment. A float value will be chosen randomly between + the limits. When -1.0 is chosen, the output image will be black, and + when 1.0 is chosen, the image will be fully white. When only one float + is provided, eg, 0.2, then -0.2 will be used for lower bound and 0.2 + will be used for upper bound. + value_range: Optional list/tuple of 2 floats for the lower and upper limit + of the values of the input data. Defaults to [0.0, 255.0]. Can be + changed to e.g. [0.0, 1.0] if the image input has been scaled before + this layer. The brightness adjustment will be scaled to this range, and + the output values will be clipped to this range. + seed: optional integer, for fixed RNG behavior. + Inputs: 3D (HWC) or 4D (NHWC) tensor, with float or int dtype. Input pixel + values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) + Output: 3D (HWC) or 4D (NHWC) tensor with brightness adjusted based on the + `factor`. By default, the layer will output floats. The output value will + be clipped to the range `[0, 255]`, the valid range of RGB colors, and + rescaled based on the `value_range` if needed. + ``` + """ + + def __init__(self, factor, value_range=(0, 255), seed=None, **kwargs): + super().__init__(seed=seed, force_generator=True, **kwargs) + if isinstance(factor, float) or isinstance(factor, int): + factor = (-factor, factor) + self.factor = preprocessing_utils.parse_factor( + factor, min_value=-1, max_value=1 + ) + self.value_range = value_range + self.seed = seed + + def augment_image(self, image, transformation, **kwargs): + return self._brightness_adjust(image, transformation) + + def augment_label(self, label, transformation, **kwargs): + return label + + def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + return segmentation_mask + + def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + return bounding_boxes + + def get_random_transformation(self, **kwargs): + rgb_delta_shape = (1, 1, 1) + random_rgb_delta = self.factor(shape=rgb_delta_shape) + random_rgb_delta = random_rgb_delta * ( + self.value_range[1] - self.value_range[0] + ) + return random_rgb_delta + + def _brightness_adjust(self, image, rgb_delta): + rank = image.shape.rank + if rank != 3: + raise ValueError( + "Expected the input image to be rank 3. Got " + f"inputs.shape = {image.shape}" + ) + rgb_delta = tf.cast(rgb_delta, image.dtype) + image += rgb_delta + return tf.clip_by_value(image, self.value_range[0], self.value_range[1]) + + def get_config(self): + config = { + "factor": self.factor, + "value_range": self.value_range, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class RandomBrightnessTest(tf.test.TestCase): + def test_consistency_with_old_impl_rescaled_range(self): + image_shape = (16, 32, 32, 3) + fixed_factor = (0.8, 0.8) + image = tf.random.uniform(shape=image_shape) + + layer = RandomBrightness(factor=fixed_factor) + old_layer = OldRandomBrightness(factor=fixed_factor) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output) + + def test_consistency_with_old_impl_rgb_range(self): + image_shape = (16, 32, 32, 3) + fixed_factor = (0.8, 0.8) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = RandomBrightness(factor=fixed_factor) + old_layer = OldRandomBrightness(factor=fixed_factor) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output) + + +if __name__ == "__main__": + # Run benchmark + (x_train, _), _ = tf.keras.datasets.cifar10.load_data() + x_train = x_train.astype(np.float32) + + num_images = [1000, 2000, 5000, 10000] + results = {} + aug_candidates = [RandomBrightness, OldRandomBrightness] + aug_args = {"factor": (0.5)} + + for aug in aug_candidates: + # Eager Mode + c = aug.__name__ + layer = aug(**aug_args) + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # Graph Mode + c = aug.__name__ + " Graph Mode" + layer = aug(**aug_args) + + @tf.function() + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # XLA Mode + c = aug.__name__ + " XLA Mode" + layer = aug(**aug_args) + + @tf.function(jit_compile=True) + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison.png") + + # So we can actually see more relevant margins + del results[aug_candidates[1].__name__] + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison_no_old_eager.png") + + # Run unit tests + tf.test.main() diff --git a/benchmarks/vectorized_random_color_jitter.py b/benchmarks/vectorized_random_color_jitter.py new file mode 100644 index 0000000000..963d4dfb8e --- /dev/null +++ b/benchmarks/vectorized_random_color_jitter.py @@ -0,0 +1,257 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +from keras_cv.layers import preprocessing +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) +from keras_cv.utils import preprocessing as preprocessing_utils + + +class OldRandomColorJitter(BaseImageAugmentationLayer): + """RandomColorJitter class randomly apply brightness, contrast, saturation + and hue image processing operation sequentially and randomly on the + input. It expects input as RGB image. The expected image should be + `(0-255)` pixel ranges. + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `channels_last` format + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `channels_last` format + + Args: + value_range: the range of values the incoming images will have. + Represented as a two number tuple written [low, high]. + This is typically either `[0, 1]` or `[0, 255]` depending + on how your preprocessing pipeline is setup. + brightness_factor: Float or a list/tuple of 2 floats between -1.0 + and 1.0. The factor is used to determine the lower bound and + upper bound of the brightness adjustment. A float value will be + chosen randomly between the limits. When -1.0 is chosen, the + output image will be black, and when 1.0 is chosen, the image + will be fully white. When only one float is provided, eg, 0.2, + then -0.2 will be used for lower bound and 0.2 will be used for + upper bound. + contrast_factor: A positive float represented as fraction of value, + or a tuple of size 2 representing lower and upper bound. When + represented as a single float, lower = upper. The contrast factor + will be randomly picked between `[1.0 - lower, 1.0 + upper]`. + saturation_factor: Either a tuple of two floats or a single float. + `factor` controls the extent to which the image saturation is + impacted. `factor=0.5` makes this layer perform a no-op operation. + `factor=0.0` makes the image to be fully grayscale. `factor=1.0` + makes the image to be fully saturated. + hue_factor: A tuple of two floats, a single float or + `keras_cv.FactorSampler`. `factor` controls the extent to which the + image sharpness is impacted. `factor=0.0` makes this layer perform + a no-op operation, while a value of 1.0 performs the most aggressive + contrast adjustment available. If a tuple is used, a `factor` is sampled + between the two values for every image augmented. If a single float + is used, a value between `0.0` and the passed float is sampled. + In order to ensure the value is always the same, please pass a tuple + with two identical floats: `(0.5, 0.5)`. + seed: Integer. Used to create a random seed. + + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + color_jitter = keras_cv.layers.RandomColorJitter( + value_range=(0, 255), + brightness_factor=(-0.2, 0.5), + contrast_factor=(0.5, 0.9), + saturation_factor=(0.5, 0.9), + hue_factor=(0.5, 0.9), + ) + augmented_images = color_jitter(images) + ``` + """ + + def __init__( + self, + value_range, + brightness_factor, + contrast_factor, + saturation_factor, + hue_factor, + seed=None, + **kwargs, + ): + super().__init__(**kwargs) + self.value_range = value_range + self.brightness_factor = brightness_factor + self.contrast_factor = contrast_factor + self.saturation_factor = saturation_factor + self.hue_factor = hue_factor + self.seed = seed + + self.random_brightness = preprocessing.RandomBrightness( + factor=self.brightness_factor, value_range=(0, 255), seed=self.seed + ) + self.random_contrast = preprocessing.RandomContrast( + factor=self.contrast_factor, seed=self.seed + ) + self.random_saturation = preprocessing.RandomSaturation( + factor=self.saturation_factor, seed=self.seed + ) + self.random_hue = preprocessing.RandomHue( + factor=self.hue_factor, value_range=(0, 255), seed=self.seed + ) + + def augment_image(self, image, transformation=None, **kwargs): + image = preprocessing_utils.transform_value_range( + image, + original_range=self.value_range, + target_range=(0, 255), + dtype=self.compute_dtype, + ) + image = self.random_brightness(image) + image = self.random_contrast(image) + image = self.random_saturation(image) + image = self.random_hue(image) + image = preprocessing_utils.transform_value_range( + image, + original_range=(0, 255), + target_range=self.value_range, + dtype=self.compute_dtype, + ) + return image + + def augment_bounding_boxes(self, bounding_boxes, **kwargs): + return bounding_boxes + + def augment_label(self, label, transformation=None, **kwargs): + return label + + def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + return segmentation_mask + + def get_config(self): + config = super().get_config() + config.update( + { + "value_range": self.value_range, + "brightness_factor": self.brightness_factor, + "contrast_factor": self.contrast_factor, + "saturation_factor": self.saturation_factor, + "hue_factor": self.hue_factor, + "seed": self.seed, + } + ) + return config + + +if __name__ == "__main__": + # Run benchmark + (x_train, _), _ = tf.keras.datasets.cifar10.load_data() + x_train = x_train.astype(np.float32) + + num_images = [1000, 2000, 5000, 10000] + results = {} + aug_candidates = [preprocessing.RandomColorJitter, OldRandomColorJitter] + aug_args = { + "value_range": (0, 255), + "brightness_factor": (-0.2, 0.5), + "contrast_factor": (0.5, 0.9), + "saturation_factor": (0.5, 0.9), + "hue_factor": (0.5, 0.9), + } + + for aug in aug_candidates: + # Eager Mode + c = aug.__name__ + layer = aug(**aug_args) + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # Graph Mode + c = aug.__name__ + " Graph Mode" + layer = aug(**aug_args) + + @tf.function() + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # XLA Mode + c = aug.__name__ + " XLA Mode" + layer = aug(**aug_args) + + @tf.function(jit_compile=True) + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison.png") + + # So we can actually see more relevant margins + del results[aug_candidates[1].__name__] + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison_no_old_eager.png") diff --git a/benchmarks/vectorized_random_contrast.py b/benchmarks/vectorized_random_contrast.py new file mode 100644 index 0000000000..a568b03484 --- /dev/null +++ b/benchmarks/vectorized_random_contrast.py @@ -0,0 +1,223 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +from keras_cv.layers import RandomContrast +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) +from keras_cv.utils import preprocessing as preprocessing_utils + + +class OldRandomContrast(BaseImageAugmentationLayer): + """RandomContrast randomly adjusts contrast during training. + + This layer will randomly adjust the contrast of an image or images by a + random factor. Contrast is adjusted independently for each channel of each + image during training. + + For each channel, this layer computes the mean of the image pixels in the + channel and then adjusts each component `x` of each pixel to + `(x - mean) * contrast_factor + mean`. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + in integer or floating point dtype. By default, the layer will output + floats. The output value will be clipped to the range `[0, 255]`, the valid + range of RGB colors. + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Args: + factor: a positive float represented as fraction of value, or a tuple of + size 2 representing lower and upper bound. When represented as a single + float, lower = upper. The contrast factor will be randomly picked + between `[1.0 - lower, 1.0 + upper]`. For any pixel x in the channel, + the output will be `(x - mean) * factor + mean` where `mean` is the mean + value of the channel. + seed: Integer. Used to create a random seed. + """ + + def __init__(self, factor, seed=None, **kwargs): + super().__init__(seed=seed, force_generator=True, **kwargs) + if isinstance(factor, (tuple, list)): + min = 1 - factor[0] + max = 1 + factor[1] + else: + min = 1 - factor + max = 1 + factor + self.factor_input = factor + self.factor = preprocessing_utils.parse_factor( + (min, max), min_value=-1, max_value=2 + ) + self.seed = seed + + def get_random_transformation(self, **kwargs): + return self.factor() + + def augment_image(self, image, transformation, **kwargs): + contrast_factor = transformation + output = tf.image.adjust_contrast(image, contrast_factor=contrast_factor) + output = tf.clip_by_value(output, 0, 255) + output.set_shape(image.shape) + return output + + def augment_label(self, label, transformation, **kwargs): + return label + + def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + return segmentation_mask + + def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + return bounding_boxes + + def get_config(self): + config = { + "factor": self.factor_input, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class RandomContrastTest(tf.test.TestCase): + def test_consistency_with_old_impl_rescaled_range(self): + image_shape = (16, 32, 32, 3) + fixed_factor = (0.3, -0.3) # makes lower and upper the same + image = tf.random.uniform(shape=image_shape) + + layer = RandomContrast(factor=fixed_factor) + old_layer = OldRandomContrast(factor=fixed_factor) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output) + + def test_consistency_with_old_impl_rgb_range(self): + image_shape = (16, 32, 32, 3) + fixed_factor = (0.3, -0.3) # makes lower and upper the same + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = RandomContrast(factor=fixed_factor) + old_layer = OldRandomContrast(factor=fixed_factor) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output, atol=1e-5, rtol=1e-5) + + +if __name__ == "__main__": + # Run benchmark + (x_train, _), _ = tf.keras.datasets.cifar10.load_data() + x_train = x_train.astype(np.float32) + + num_images = [1000, 2000, 5000, 10000] + results = {} + aug_candidates = [RandomContrast, OldRandomContrast] + aug_args = {"factor": (0.5)} + + for aug in aug_candidates: + # Eager Mode + c = aug.__name__ + layer = aug(**aug_args) + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # Graph Mode + c = aug.__name__ + " Graph Mode" + layer = aug(**aug_args) + + @tf.function() + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # XLA Mode + c = aug.__name__ + " XLA Mode" + layer = aug(**aug_args) + + @tf.function(jit_compile=True) + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison.png") + + # So we can actually see more relevant margins + del results[aug_candidates[1].__name__] + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison_no_old_eager.png") + + # Run unit tests + tf.test.main() diff --git a/benchmarks/vectorized_random_hue.py b/benchmarks/vectorized_random_hue.py new file mode 100644 index 0000000000..68132e7c94 --- /dev/null +++ b/benchmarks/vectorized_random_hue.py @@ -0,0 +1,206 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf + +from keras_cv.layers import RandomHue +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) +from keras_cv.utils import preprocessing as preprocessing_utils + + +class OldRandomHue(BaseImageAugmentationLayer): + """Randomly adjusts the hue on given images. + + This layer will randomly increase/reduce the hue for the input RGB + images. At inference time, the output will be identical to the input. + Call the layer with `training=True` to adjust the brightness of the input. + + The image hue is adjusted by converting the image(s) to HSV and rotating the + hue channel (H) by delta. The image is then converted back to RGB. + + Args: + factor: A tuple of two floats, a single float or `keras_cv.FactorSampler`. + `factor` controls the extent to which the image hue is impacted. + `factor=0.0` makes this layer perform a no-op operation, while a value of + 1.0 performs the most aggressive contrast adjustment available. If a tuple + is used, a `factor` is sampled between the two values for every image + augmented. If a single float is used, a value between `0.0` and the passed + float is sampled. In order to ensure the value is always the same, please + pass a tuple with two identical floats: `(0.5, 0.5)`. + value_range: the range of values the incoming images will have. + Represented as a two number tuple written [low, high]. + This is typically either `[0, 1]` or `[0, 255]` depending + on how your preprocessing pipeline is setup. + seed: Integer. Used to create a random seed. + + """ + + def __init__(self, factor, value_range, seed=None, **kwargs): + super().__init__(seed=seed, **kwargs) + self.factor = preprocessing_utils.parse_factor( + factor, + ) + self.value_range = value_range + self.seed = seed + + def get_random_transformation(self, **kwargs): + invert = preprocessing_utils.random_inversion(self._random_generator) + # We must scale self.factor() to the range [-0.5, 0.5]. This is because the + # tf.image operation performs rotation on the hue saturation value orientation. + # This can be thought of as an angle in the range [-180, 180] + return invert * self.factor() * 0.5 + + def augment_image(self, image, transformation=None, **kwargs): + image = preprocessing_utils.transform_value_range( + image, self.value_range, (0, 1), dtype=self.compute_dtype + ) + + # tf.image.adjust_hue expects floats to be in range [0, 1] + image = tf.image.adjust_hue(image, delta=transformation) + # RandomHue is one of the rare KPLs that needs to clip + image = tf.clip_by_value(image, 0, 1) + image = preprocessing_utils.transform_value_range( + image, (0, 1), self.value_range, dtype=self.compute_dtype + ) + return image + + def augment_bounding_boxes(self, bounding_boxes, **kwargs): + return bounding_boxes + + def augment_label(self, label, transformation=None, **kwargs): + return label + + def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + return segmentation_mask + + def get_config(self): + config = { + "factor": self.factor, + "value_range": self.value_range, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + +class RandomHueTest(tf.test.TestCase): + def test_consistency_with_old_impl_rescaled_range(self): + image_shape = (16, 32, 32, 3) + fixed_factor = (0.8, 0.8) + fixed_seed = 2023 + image = tf.random.uniform(shape=image_shape) + + layer = RandomHue(fixed_factor, (0, 1), fixed_seed) + old_layer = OldRandomHue(fixed_factor, (0, 1), fixed_seed) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output) + + def test_consistency_with_old_impl_rgb_range(self): + image_shape = (16, 32, 32, 3) + fixed_factor = (0.8, 0.8) + fixed_seed = 2023 + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = RandomHue(fixed_factor, (0, 255), fixed_seed) + old_layer = OldRandomHue(fixed_factor, (0, 255), fixed_seed) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output, atol=1e-3, rtol=1e-5) + + +if __name__ == "__main__": + # Run benchmark + (x_train, _), _ = tf.keras.datasets.cifar10.load_data() + x_train = x_train.astype(np.float32) + + num_images = [1000, 2000, 5000, 10000] + results = {} + aug_candidates = [RandomHue, OldRandomHue] + aug_args = {"factor": (0.5), "value_range": (0, 255)} + + for aug in aug_candidates: + # Eager Mode + c = aug.__name__ + layer = aug(**aug_args) + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # Graph Mode + c = aug.__name__ + " Graph Mode" + layer = aug(**aug_args) + + @tf.function() + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # XLA Mode + # OldRandomHue fails to run jit_compile=True + + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison.png") + + # So we can actually see more relevant margins + del results[aug_candidates[1].__name__] + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison_no_old_eager.png") + + # Run unit tests + tf.test.main() diff --git a/keras_cv/layers/preprocessing/random_brightness.py b/keras_cv/layers/preprocessing/random_brightness.py index 790719540d..d8919c6cea 100644 --- a/keras_cv/layers/preprocessing/random_brightness.py +++ b/keras_cv/layers/preprocessing/random_brightness.py @@ -1,4 +1,4 @@ -# Copyright 2022 The KerasCV Authors +# Copyright 2023 The KerasCV Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,14 +14,14 @@ import tensorflow as tf -from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( - BaseImageAugmentationLayer, +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, ) -from keras_cv.utils import preprocessing +from keras_cv.utils import preprocessing as preprocessing_utils @tf.keras.utils.register_keras_serializable(package="keras_cv") -class RandomBrightness(BaseImageAugmentationLayer): +class RandomBrightness(VectorizedBaseImageAugmentationLayer): """A preprocessing layer which randomly adjusts brightness during training. This layer will randomly increase/reduce the brightness for the input RGB images. @@ -52,6 +52,12 @@ class RandomBrightness(BaseImageAugmentationLayer): `factor`. By default, the layer will output floats. The output value will be clipped to the range `[0, 255]`, the valid range of RGB colors, and rescaled based on the `value_range` if needed. + + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + random_brightness = keras_cv.layers.preprocessing.RandomBrightness() + augmented_images = random_brightness(images) ``` """ @@ -59,40 +65,44 @@ def __init__(self, factor, value_range=(0, 255), seed=None, **kwargs): super().__init__(seed=seed, force_generator=True, **kwargs) if isinstance(factor, float) or isinstance(factor, int): factor = (-factor, factor) - self.factor = preprocessing.parse_factor(factor, min_value=-1, max_value=1) + self.factor = preprocessing_utils.parse_factor( + factor, min_value=-1, max_value=1 + ) self.value_range = value_range self.seed = seed - def augment_image(self, image, transformation, **kwargs): - return self._brightness_adjust(image, transformation) - - def augment_label(self, label, transformation, **kwargs): - return label - - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): - return segmentation_mask - - def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): - return bounding_boxes - - def get_random_transformation(self, **kwargs): - rgb_delta_shape = (1, 1, 1) - random_rgb_delta = self.factor(shape=rgb_delta_shape) - random_rgb_delta = random_rgb_delta * ( + def get_random_transformation_batch(self, batch_size, **kwargs): + rgb_delta_shape = (batch_size, 1, 1, 1) + random_rgb_deltas = self.factor(shape=rgb_delta_shape) + random_rgb_deltas = random_rgb_deltas * ( self.value_range[1] - self.value_range[0] ) - return random_rgb_delta + return random_rgb_deltas - def _brightness_adjust(self, image, rgb_delta): - rank = image.shape.rank - if rank != 3: + def augment_ragged_image(self, image, transformation, **kwargs): + return self.augment_images( + images=image, transformations=transformation, **kwargs + ) + + def augment_images(self, images, transformations, **kwargs): + rank = images.shape.rank + if rank != 4: raise ValueError( - "Expected the input image to be rank 3. Got " - f"inputs.shape = {image.shape}" + "Expected the input image to be rank 4. Got " + f"inputs.shape = {images.shape}" ) - rgb_delta = tf.cast(rgb_delta, image.dtype) - image += rgb_delta - return tf.clip_by_value(image, self.value_range[0], self.value_range[1]) + rgb_deltas = tf.cast(transformations, images.dtype) + images += rgb_deltas + return tf.clip_by_value(images, self.value_range[0], self.value_range[1]) + + def augment_labels(self, labels, transformations, **kwargs): + return labels + + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + return segmentation_masks + + def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): + return bounding_boxes def get_config(self): config = { @@ -102,3 +112,9 @@ def get_config(self): } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if isinstance(config["factor"], dict): + config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_color_jitter.py b/keras_cv/layers/preprocessing/random_color_jitter.py index 48acd4d3d0..6b2051bb23 100644 --- a/keras_cv/layers/preprocessing/random_color_jitter.py +++ b/keras_cv/layers/preprocessing/random_color_jitter.py @@ -1,4 +1,4 @@ -# Copyright 2022 The KerasCV Authors +# Copyright 2023 The KerasCV Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,14 +15,14 @@ import tensorflow as tf from keras_cv.layers import preprocessing -from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( - BaseImageAugmentationLayer, +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, ) from keras_cv.utils import preprocessing as preprocessing_utils @tf.keras.utils.register_keras_serializable(package="keras_cv") -class RandomColorJitter(BaseImageAugmentationLayer): +class RandomColorJitter(VectorizedBaseImageAugmentationLayer): """RandomColorJitter class randomly apply brightness, contrast, saturation and hue image processing operation sequentially and randomly on the input. It expects input as RGB image. The expected image should be @@ -113,33 +113,38 @@ def __init__( factor=self.hue_factor, value_range=(0, 255), seed=self.seed ) - def augment_image(self, image, transformation=None, **kwargs): - image = preprocessing_utils.transform_value_range( - image, + def augment_ragged_image(self, image, transformation, **kwargs): + return self.augment_images( + images=image, transformations=transformation, **kwargs + ) + + def augment_images(self, images, transformations=None, **kwargs): + images = preprocessing_utils.transform_value_range( + images, original_range=self.value_range, target_range=(0, 255), dtype=self.compute_dtype, ) - image = self.random_brightness(image) - image = self.random_contrast(image) - image = self.random_saturation(image) - image = self.random_hue(image) - image = preprocessing_utils.transform_value_range( - image, + images = self.random_brightness(images) + images = self.random_contrast(images) + images = self.random_saturation(images) + images = self.random_hue(images) + images = preprocessing_utils.transform_value_range( + images, original_range=(0, 255), target_range=self.value_range, dtype=self.compute_dtype, ) - return image + return images - def augment_bounding_boxes(self, bounding_boxes, **kwargs): - return bounding_boxes + def augment_labels(self, labels, transformations, **kwargs): + return labels - def augment_label(self, label, transformation=None, **kwargs): - return label + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + return segmentation_masks - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): - return segmentation_mask + def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): + return bounding_boxes def get_config(self): config = super().get_config() @@ -154,3 +159,7 @@ def get_config(self): } ) return config + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_contrast.py b/keras_cv/layers/preprocessing/random_contrast.py index 25b1c58e11..b149a9fce0 100644 --- a/keras_cv/layers/preprocessing/random_contrast.py +++ b/keras_cv/layers/preprocessing/random_contrast.py @@ -1,4 +1,4 @@ -# Copyright 2022 The KerasCV Authors +# Copyright 2023 The KerasCV Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,14 +14,14 @@ import tensorflow as tf -from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( - BaseImageAugmentationLayer, +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, ) -from keras_cv.utils import preprocessing +from keras_cv.utils import preprocessing as preprocessing_utils @tf.keras.utils.register_keras_serializable(package="keras_cv") -class RandomContrast(BaseImageAugmentationLayer): +class RandomContrast(VectorizedBaseImageAugmentationLayer): """RandomContrast randomly adjusts contrast during training. This layer will randomly adjust the contrast of an image or images by a @@ -52,6 +52,13 @@ class RandomContrast(BaseImageAugmentationLayer): the output will be `(x - mean) * factor + mean` where `mean` is the mean value of the channel. seed: Integer. Used to create a random seed. + + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + random_contrast = keras_cv.layers.preprocessing.RandomContrast() + augmented_images = random_contrast(images) + ``` """ def __init__(self, factor, seed=None, **kwargs): @@ -63,26 +70,36 @@ def __init__(self, factor, seed=None, **kwargs): min = 1 - factor max = 1 + factor self.factor_input = factor - self.factor = preprocessing.parse_factor((min, max), min_value=-1, max_value=2) + self.factor = preprocessing_utils.parse_factor( + (min, max), min_value=-1, max_value=2 + ) self.seed = seed - def get_random_transformation(self, **kwargs): - return self.factor() + def get_random_transformation_batch(self, batch_size, **kwargs): + return self.factor(shape=(batch_size,)) + + def augment_ragged_image(self, image, transformation, **kwargs): + return self.augment_images( + images=image, transformations=transformation, **kwargs + ) - def augment_image(self, image, transformation, **kwargs): - contrast_factor = transformation - output = tf.image.adjust_contrast(image, contrast_factor=contrast_factor) - output = tf.clip_by_value(output, 0, 255) - output.set_shape(image.shape) - return output + def augment_images(self, images, transformations, **kwargs): + contrast_factors = tf.cast(transformations, dtype=images.dtype) + # broadcast + contrast_factors = contrast_factors[..., tf.newaxis, tf.newaxis, tf.newaxis] + means = tf.reduce_mean(images, axis=(1, 2), keepdims=True) - def augment_label(self, label, transformation, **kwargs): - return label + images = (images - means) * contrast_factors + means + images = tf.clip_by_value(images, 0, 255) + return images - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): - return segmentation_mask + def augment_labels(self, labels, transformations, **kwargs): + return labels - def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + return segmentation_masks + + def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): return bounding_boxes def get_config(self): @@ -92,3 +109,7 @@ def get_config(self): } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_hue.py b/keras_cv/layers/preprocessing/random_hue.py index 221ffd09b7..297d616112 100644 --- a/keras_cv/layers/preprocessing/random_hue.py +++ b/keras_cv/layers/preprocessing/random_hue.py @@ -1,4 +1,4 @@ -# Copyright 2022 The KerasCV Authors +# Copyright 2023 The KerasCV Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,14 +13,14 @@ # limitations under the License. import tensorflow as tf -from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( - BaseImageAugmentationLayer, +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, ) -from keras_cv.utils import preprocessing +from keras_cv.utils import preprocessing as preprocessing_utils @tf.keras.utils.register_keras_serializable(package="keras_cv") -class RandomHue(BaseImageAugmentationLayer): +class RandomHue(VectorizedBaseImageAugmentationLayer): """Randomly adjusts the hue on given images. This layer will randomly increase/reduce the hue for the input RGB @@ -45,45 +45,65 @@ class RandomHue(BaseImageAugmentationLayer): on how your preprocessing pipeline is setup. seed: Integer. Used to create a random seed. + Usage: + ```python + (images, labels), _ = tf.keras.datasets.cifar10.load_data() + random_hue = keras_cv.layers.preprocessing.RandomHue() + augmented_images = random_hue(images) + ``` """ def __init__(self, factor, value_range, seed=None, **kwargs): super().__init__(seed=seed, **kwargs) - self.factor = preprocessing.parse_factor( + self.factor = preprocessing_utils.parse_factor( factor, ) self.value_range = value_range self.seed = seed - def get_random_transformation(self, **kwargs): - invert = preprocessing.random_inversion(self._random_generator) + def get_random_transformation_batch(self, batch_size, **kwargs): + invert = self._random_generator.random_uniform((batch_size,), 0, 1, tf.float32) + invert = tf.where(invert > 0.5, -tf.ones_like(invert), tf.ones_like(invert)) # We must scale self.factor() to the range [-0.5, 0.5]. This is because the # tf.image operation performs rotation on the hue saturation value orientation. # This can be thought of as an angle in the range [-180, 180] - return invert * self.factor() * 0.5 + return invert * self.factor(shape=(batch_size,)) * 0.5 - def augment_image(self, image, transformation=None, **kwargs): - image = preprocessing.transform_value_range( - image, self.value_range, (0, 1), dtype=self.compute_dtype + def augment_ragged_image(self, image, transformation, **kwargs): + return self.augment_images( + images=image, transformations=transformation, **kwargs ) + def augment_images(self, images, transformations, **kwargs): + images = preprocessing_utils.transform_value_range( + images, self.value_range, (0, 1), dtype=self.compute_dtype + ) + adjust_factors = tf.cast(transformations, images.dtype) + # broadcast + adjust_factors = adjust_factors[..., tf.newaxis, tf.newaxis] + # tf.image.adjust_hue expects floats to be in range [0, 1] - image = tf.image.adjust_hue(image, delta=transformation) + images = tf.image.rgb_to_hsv(images) + h_channel = images[..., 0] + adjust_factors + h_channel = tf.where(h_channel > 1.0, h_channel - 1.0, h_channel) + h_channel = tf.where(h_channel < 0.0, h_channel + 1.0, h_channel) + images = tf.stack([h_channel, images[..., 1], images[..., 2]], axis=-1) + images = tf.image.hsv_to_rgb(images) # RandomHue is one of the rare KPLs that needs to clip - image = tf.clip_by_value(image, 0, 1) - image = preprocessing.transform_value_range( - image, (0, 1), self.value_range, dtype=self.compute_dtype + images = tf.clip_by_value(images, 0, 1) + images = preprocessing_utils.transform_value_range( + images, (0, 1), self.value_range, dtype=self.compute_dtype ) - return image + return images - def augment_bounding_boxes(self, bounding_boxes, **kwargs): - return bounding_boxes + def augment_labels(self, labels, transformations, **kwargs): + return labels - def augment_label(self, label, transformation=None, **kwargs): - return label + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + return segmentation_masks - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): - return segmentation_mask + def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): + return bounding_boxes def get_config(self): config = { @@ -93,3 +113,9 @@ def get_config(self): } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + if isinstance(config["factor"], dict): + config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_hue_test.py b/keras_cv/layers/preprocessing/random_hue_test.py index 4573016096..18c1504058 100644 --- a/keras_cv/layers/preprocessing/random_hue_test.py +++ b/keras_cv/layers/preprocessing/random_hue_test.py @@ -48,8 +48,12 @@ def test_adjust_full_opposite_hue(self): channel_min = tf.math.reduce_min(output, axis=-1) # Make sure the max and min channel are the same between input and output # In the meantime, and channel will swap between each other. - self.assertAllClose(channel_max, tf.math.reduce_max(image, axis=-1)) - self.assertAllClose(channel_min, tf.math.reduce_min(image, axis=-1)) + self.assertAllClose( + channel_max, tf.math.reduce_max(image, axis=-1), atol=1e-5, rtol=1e-5 + ) + self.assertAllClose( + channel_min, tf.math.reduce_min(image, axis=-1), atol=1e-5, rtol=1e-5 + ) @parameterized.named_parameters( ("025", 0.25), ("05", 0.5), ("075", 0.75), ("100", 1.0) From 37d34bf7380f54985674d64911fe9efa23a6350a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Szyma=C5=84ski?= Date: Fri, 17 Feb 2023 19:03:34 +0100 Subject: [PATCH 21/27] Added vectorized Auto Contrast implementation. (#1394) * Added vectorized Auto Contrast implementation. * Fixed typos in fn signatures. * Moved consistency test to benchmarks. * Override random transformation. * Lint code. * Added back the default get_random_transformation_batch. --- benchmarks/vectorized_auto_contrast.py | 174 ++++++++++++++++++ .../layers/preprocessing/auto_contrast.py | 45 +++-- 2 files changed, 201 insertions(+), 18 deletions(-) create mode 100644 benchmarks/vectorized_auto_contrast.py diff --git a/benchmarks/vectorized_auto_contrast.py b/benchmarks/vectorized_auto_contrast.py new file mode 100644 index 0000000000..acd18fce83 --- /dev/null +++ b/benchmarks/vectorized_auto_contrast.py @@ -0,0 +1,174 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow.keras as keras + +from keras_cv.layers import AutoContrast +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) +from keras_cv.utils import preprocessing + + +class OldAutoContrast(BaseImageAugmentationLayer): + """Performs the AutoContrast operation on an image. + + Auto contrast stretches the values of an image across the entire available + `value_range`. This makes differences between pixels more obvious. An example of + this is if an image only has values `[0, 1]` out of the range `[0, 255]`, auto + contrast will change the `1` values to be `255`. + + Args: + value_range: the range of values the incoming images will have. + Represented as a two number tuple written [low, high]. + This is typically either `[0, 1]` or `[0, 255]` depending + on how your preprocessing pipeline is setup. + """ + + def __init__( + self, + value_range, + **kwargs, + ): + super().__init__(**kwargs) + self.value_range = value_range + + def augment_image(self, image, transformation=None, **kwargs): + original_image = image + image = preprocessing.transform_value_range( + image, + original_range=self.value_range, + target_range=(0, 255), + dtype=self.compute_dtype, + ) + + low = tf.reduce_min(tf.reduce_min(image, axis=0), axis=0) + high = tf.reduce_max(tf.reduce_max(image, axis=0), axis=0) + scale = 255.0 / (high - low) + offset = -low * scale + + image = image * scale[None, None] + offset[None, None] + result = tf.clip_by_value(image, 0.0, 255.0) + result = preprocessing.transform_value_range( + result, + original_range=(0, 255), + target_range=self.value_range, + dtype=self.compute_dtype, + ) + # don't process NaN channels + result = tf.where(tf.math.is_nan(result), original_image, result) + return result + + def augment_bounding_boxes(self, bounding_boxes, **kwargs): + return bounding_boxes + + def augment_label(self, label, transformation=None, **kwargs): + return label + + def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + return segmentation_mask + + def get_config(self): + config = super().get_config() + config.update({"value_range": self.value_range}) + return config + + +class AutoContrastConsistencyTest(tf.test.TestCase): + def test_consistency_with_old_implementation(self): + images = tf.random.uniform(shape=(16, 32, 32, 3)) + + output = AutoContrast(value_range=(0, 1))(images) + old_output = OldAutoContrast(value_range=(0, 1))(images) + + self.assertAllClose(old_output, output) + + +if __name__ == "__main__": + (x_train, _), _ = keras.datasets.cifar10.load_data() + x_train = x_train.astype(float) + + images = [] + num_images = [1000, 2000, 5000, 10000] + results = {} + + for aug in [AutoContrast, OldAutoContrast]: + c = aug.__name__ + + layer = aug(value_range=(0, 255)) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1 - t0}") + + results[c] = runtimes + + c = aug.__name__ + " Graph Mode" + + layer = aug(value_range=(0, 255)) + + @tf.function() + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1 - t0}") + + results[c] = runtimes + + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.show() + + # So we can actually see more relevant margins + del results["OldAutoContrast"] + + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.show() + + # Compare two implementations + tf.test.main() diff --git a/keras_cv/layers/preprocessing/auto_contrast.py b/keras_cv/layers/preprocessing/auto_contrast.py index e4b70b1d2c..9d36d67742 100644 --- a/keras_cv/layers/preprocessing/auto_contrast.py +++ b/keras_cv/layers/preprocessing/auto_contrast.py @@ -14,18 +14,18 @@ import tensorflow as tf -from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( - BaseImageAugmentationLayer, +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, ) from keras_cv.utils import preprocessing @tf.keras.utils.register_keras_serializable(package="keras_cv") -class AutoContrast(BaseImageAugmentationLayer): +class AutoContrast(VectorizedBaseImageAugmentationLayer): """Performs the AutoContrast operation on an image. Auto contrast stretches the values of an image across the entire available - `value_range`. This makes differences between pixels more obvious. An example of + `value_range`. This makes differences between pixels more obvious. An example of this is if an image only has values `[0, 1]` out of the range `[0, 255]`, auto contrast will change the `1` values to be `255`. @@ -33,7 +33,7 @@ class AutoContrast(BaseImageAugmentationLayer): value_range: the range of values the incoming images will have. Represented as a two number tuple written [low, high]. This is typically either `[0, 1]` or `[0, 255]` depending - on how your preprocessing pipeline is setup. + on how your preprocessing pipeline is set up. """ def __init__( @@ -44,22 +44,22 @@ def __init__( super().__init__(**kwargs) self.value_range = value_range - def augment_image(self, image, transformation=None, **kwargs): - original_image = image - image = preprocessing.transform_value_range( - image, + def augment_images(self, images, transformations=None, **kwargs): + original_images = images + images = preprocessing.transform_value_range( + images, original_range=self.value_range, target_range=(0, 255), dtype=self.compute_dtype, ) - low = tf.reduce_min(tf.reduce_min(image, axis=0), axis=0) - high = tf.reduce_max(tf.reduce_max(image, axis=0), axis=0) + low = tf.reduce_min(images, axis=(1, 2), keepdims=True) + high = tf.reduce_max(images, axis=(1, 2), keepdims=True) scale = 255.0 / (high - low) offset = -low * scale - image = image * scale[None, None] + offset[None, None] - result = tf.clip_by_value(image, 0.0, 255.0) + images = images * scale + offset + result = tf.clip_by_value(images, 0.0, 255.0) result = preprocessing.transform_value_range( result, original_range=(0, 255), @@ -67,17 +67,26 @@ def augment_image(self, image, transformation=None, **kwargs): dtype=self.compute_dtype, ) # don't process NaN channels - result = tf.where(tf.math.is_nan(result), original_image, result) + result = tf.where(tf.math.is_nan(result), original_images, result) return result def augment_bounding_boxes(self, bounding_boxes, **kwargs): return bounding_boxes - def augment_label(self, label, transformation=None, **kwargs): - return label + def augment_labels(self, labels, transformations=None, **kwargs): + return labels - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): - return segmentation_mask + def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + return segmentation_masks + + def augment_keypoints(self, keypoints, transformations, **kwargs): + return keypoints + + def augment_targets(self, targets, transformations, **kwargs): + return targets + + def augment_ragged_image(self, image, transformation, **kwargs): + return self.augment_images(image, transformations=transformation, **kwargs) def get_config(self): config = super().get_config() From 34a82e5752a6bb40378add1d12072a0462f04641 Mon Sep 17 00:00:00 2001 From: Jonathan Bischof Date: Fri, 17 Feb 2023 22:07:49 +0000 Subject: [PATCH 22/27] Remove closures and use subclassed functional model (#1401) * Replace closures and use subclassed model * Convert `ResNetv2` to `keras.Model` * Clean up docstrings * Respond to comments * Respond to comments 2 * Format * Fix typo * Fix linter error * Fix arg default --- keras_cv/models/resnet_v2.py | 534 +++++++++++++++++------------- keras_cv/models/resnet_v2_test.py | 17 +- 2 files changed, 311 insertions(+), 240 deletions(-) diff --git a/keras_cv/models/resnet_v2.py b/keras_cv/models/resnet_v2.py index 39f2fcd227..ef40a5a806 100644 --- a/keras_cv/models/resnet_v2.py +++ b/keras_cv/models/resnet_v2.py @@ -20,6 +20,7 @@ import types import tensorflow as tf +from tensorflow import keras from tensorflow.keras import backend from tensorflow.keras import layers @@ -55,6 +56,7 @@ } BN_AXIS = 3 +BN_EPSILON = 1.001e-5 BASE_DOCSTRING = """Instantiates the {name} architecture. Reference: @@ -69,13 +71,15 @@ For transfer learning use cases, make sure to read the [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). + Args: - include_rescaling: whether or not to Rescale the inputs.If set to True, - inputs will be passed through a `Rescaling(1/255.0)` layer. - include_top: whether to include the fully-connected layer at the top of the - network. If provided, classes must be provided. - classes: optional number of classes to classify images into, only to be - specified if `include_top` is True. + include_rescaling: bool, whether or not to Rescale the inputs. If set + to `True`, inputs will be passed through a `Rescaling(1/255.0)` + layer. + include_top: bool, whether to include the fully-connected layer at + the top of the network. If provided, `classes` must be provided. + classes: optional int, number of classes to classify images into (only + to be specified if `include_top` is `True`). weights: one of `None` (random initialization), a pretrained weight file path, or a reference to pre-trained weights (e.g. 'imagenet/classification') (see available pre-trained weights in weights.py) @@ -94,214 +98,225 @@ classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. + Returns: A `keras.Model` instance. """ -def BasicBlock( - filters, kernel_size=3, stride=1, dilation=1, conv_shortcut=False, name=None +def apply_basic_block( + x, filters, kernel_size=3, stride=1, dilation=1, conv_shortcut=False, name=None ): """A basic residual block (v2). + Args: - filters: integer, filters of the basic layer. - kernel_size: default 3, kernel size of the bottleneck layer. - stride: default 1, stride of the first layer. - conv_shortcut: default False, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. + x: input tensor. + filters: int, filters of the basic layer. + kernel_size: int, kernel size of the bottleneck layer. Defaults to 3. + stride: int, stride of the first layer. Defaults to 1. + dilation: int, the dilation rate to use for dilated convolution. + Defaults to 1. + conv_shortcut: bool, uses convolution shortcut if `True`. If `False` + (default), uses identity or pooling shortcut, based on stride. + Returns: Output tensor for the residual block. """ + if name is None: name = f"v2_basic_block_{backend.get_uid('v2_basic_block')}" - def apply(x): - use_preactivation = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_use_preactivation_bn" - )(x) - - use_preactivation = layers.Activation( - "relu", name=name + "_use_preactivation_relu" - )(use_preactivation) + use_preactivation = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_use_preactivation_bn" + )(x) - s = stride if dilation == 1 else 1 - if conv_shortcut: - shortcut = layers.Conv2D(filters, 1, strides=s, name=name + "_0_conv")( - use_preactivation - ) - else: - shortcut = ( - layers.MaxPooling2D(1, strides=stride, name=name + "_0_max_pooling")(x) - if s > 1 - else x - ) + use_preactivation = layers.Activation( + "relu", name=name + "_use_preactivation_relu" + )(use_preactivation) - x = layers.Conv2D( - filters, - kernel_size, - padding="SAME", - strides=1, - use_bias=False, - name=name + "_1_conv", - )(use_preactivation) - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_1_bn" - )(x) - x = layers.Activation("relu", name=name + "_1_relu")(x) + s = stride if dilation == 1 else 1 + if conv_shortcut: + shortcut = layers.Conv2D(filters, 1, strides=s, name=name + "_0_conv")( + use_preactivation + ) + else: + shortcut = ( + layers.MaxPooling2D(1, strides=stride, name=name + "_0_max_pooling")(x) + if s > 1 + else x + ) - x = layers.Conv2D( - filters, - kernel_size, - strides=s, - padding="same", - dilation_rate=dilation, - use_bias=False, - name=name + "_2_conv", - )(x) + x = layers.Conv2D( + filters, + kernel_size, + padding="SAME", + strides=1, + use_bias=False, + name=name + "_1_conv", + )(use_preactivation) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_1_bn" + )(x) + x = layers.Activation("relu", name=name + "_1_relu")(x) - x = layers.Add(name=name + "_out")([shortcut, x]) - return x + x = layers.Conv2D( + filters, + kernel_size, + strides=s, + padding="same", + dilation_rate=dilation, + use_bias=False, + name=name + "_2_conv", + )(x) - return apply + x = layers.Add(name=name + "_out")([shortcut, x]) + return x -def Block(filters, kernel_size=3, stride=1, dilation=1, conv_shortcut=False, name=None): +def apply_block( + x, filters, kernel_size=3, stride=1, dilation=1, conv_shortcut=False, name=None +): """A residual block (v2). + Args: - filters: integer, filters of the bottleneck layer. - kernel_size: default 3, kernel size of the bottleneck layer. - stride: default 1, stride of the first layer. - conv_shortcut: default False, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. + x: input tensor. + filters: int, filters of the basic layer. + kernel_size: int, kernel size of the bottleneck layer. Defaults to 3. + stride: int, stride of the first layer. Defaults to 1. + dilation: int, the dilation rate to use for dilated convolution. + Defaults to 1. + conv_shortcut: bool, uses convolution shortcut if `True`. If `False` + (default), uses identity or pooling shortcut, based on stride. + Returns: Output tensor for the residual block. """ if name is None: name = f"v2_block_{backend.get_uid('v2_block')}" - def apply(x): - use_preactivation = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_use_preactivation_bn" - )(x) - - use_preactivation = layers.Activation( - "relu", name=name + "_use_preactivation_relu" - )(use_preactivation) + use_preactivation = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_use_preactivation_bn" + )(x) - s = stride if dilation == 1 else 1 - if conv_shortcut: - shortcut = layers.Conv2D( - 4 * filters, - 1, - strides=s, - name=name + "_0_conv", - )(use_preactivation) - else: - shortcut = ( - layers.MaxPooling2D(1, strides=stride, name=name + "_0_max_pooling")(x) - if s > 1 - else x - ) + use_preactivation = layers.Activation( + "relu", name=name + "_use_preactivation_relu" + )(use_preactivation) - x = layers.Conv2D(filters, 1, strides=1, use_bias=False, name=name + "_1_conv")( - use_preactivation + s = stride if dilation == 1 else 1 + if conv_shortcut: + shortcut = layers.Conv2D( + 4 * filters, + 1, + strides=s, + name=name + "_0_conv", + )(use_preactivation) + else: + shortcut = ( + layers.MaxPooling2D(1, strides=stride, name=name + "_0_max_pooling")(x) + if s > 1 + else x ) - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_1_bn" - )(x) - x = layers.Activation("relu", name=name + "_1_relu")(x) - x = layers.Conv2D( - filters, - kernel_size, - strides=s, - use_bias=False, - padding="same", - dilation_rate=dilation, - name=name + "_2_conv", - )(x) - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_2_bn" - )(x) - x = layers.Activation("relu", name=name + "_2_relu")(x) + x = layers.Conv2D(filters, 1, strides=1, use_bias=False, name=name + "_1_conv")( + use_preactivation + ) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_1_bn" + )(x) + x = layers.Activation("relu", name=name + "_1_relu")(x) - x = layers.Conv2D(4 * filters, 1, name=name + "_3_conv")(x) - x = layers.Add(name=name + "_out")([shortcut, x]) - return x + x = layers.Conv2D( + filters, + kernel_size, + strides=s, + use_bias=False, + padding="same", + dilation_rate=dilation, + name=name + "_2_conv", + )(x) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_2_bn" + )(x) + x = layers.Activation("relu", name=name + "_2_relu")(x) - return apply + x = layers.Conv2D(4 * filters, 1, name=name + "_3_conv")(x) + x = layers.Add(name=name + "_out")([shortcut, x]) + return x -def Stack( +def apply_stack( + x, filters, blocks, stride=2, dilations=1, name=None, - block_fn=Block, + block_type="block", first_shortcut=True, stack_index=1, ): """A set of stacked blocks. + Args: - filters: integer, filters of the layer in a block. - blocks: integer, blocks in the stacked blocks. - stride: default 2, stride of the first layer in the first block. - name: string, stack label. - block_fn: callable, `Block` or `BasicBlock`, the block function to stack. - first_shortcut: default True, use convolution shortcut if True, - otherwise identity shortcut. + x: input tensor. + filters: int, filters of the layer in a block. + blocks: int, blocks in the stacked blocks. + stride: int, stride of the first layer in the first block. Defaults to 2. + dilation: int, the dilation rate to use for dilated convolution. + Defaults to 1. + block_type: string, one of "basic_block" or "block". The block type to + stack. Use "basic_block" for ResNet18 and ResNet34. + first_shortcut: bool. Use convolution shortcut if `True` (default), + otherwise uses identity or pooling shortcut, based on stride. + Returns: Output tensor for the stacked blocks. """ + if name is None: name = f"v2_stack_{stack_index}" - def apply(x): - x = block_fn(filters, conv_shortcut=first_shortcut, name=name + "_block1")(x) - for i in range(2, blocks): - x = block_fn(filters, dilation=dilations, name=name + "_block" + str(i))(x) - x = block_fn( - filters, - stride=stride, - dilation=dilations, - name=name + "_block" + str(blocks), - )(x) - return x + if block_type == "basic_block": + block_fn = apply_basic_block + elif block_type == "block": + block_fn = apply_block + else: + raise ValueError( + """`block_type` must be either "basic_block" or "block". """ + f"Received block_type={block_type}." + ) - return apply + x = block_fn(x, filters, conv_shortcut=first_shortcut, name=name + "_block1") + for i in range(2, blocks): + x = block_fn(x, filters, dilation=dilations, name=name + "_block" + str(i)) + x = block_fn( + x, + filters, + stride=stride, + dilation=dilations, + name=name + "_block" + str(blocks), + ) + return x -def ResNetV2( - stackwise_filters, - stackwise_blocks, - stackwise_strides, - include_rescaling, - include_top, - stackwise_dilations=None, - name="ResNetV2", - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classes=None, - classifier_activation="softmax", - block_fn=Block, - **kwargs, -): +@keras.utils.register_keras_serializable(package="keras_cv.models") +class ResNetV2(keras.Model): """Instantiates the ResNetV2 architecture. Args: - stackwise_filters: number of filters for each stack in the model. - stackwise_blocks: number of blocks for each stack in the model. - stackwise_strides: stride for each stack in the model. - include_rescaling: whether or not to Rescale the inputs. If set to True, - inputs will be passed through a `Rescaling(1/255.0)` layer. - name: string, model name. - include_top: whether to include the fully-connected + stackwise_filters: list of ints, number of filters for each stack in + the model. + stackwise_blocks: list of ints, number of blocks for each stack in the + model. + stackwise_strides: list of ints, stride for each stack in the model. + include_rescaling: bool, whether or not to Rescale the inputs. If set + to `True`, inputs will be passed through a `Rescaling(1/255.0)` + layer. + include_top: bool, whether to include the fully-connected layer at the top of the network. + stackwise_dialations: list of ints, dialation for each stack in the + model. If `None` (default), dialation will not be used. + name: string, model name. weights: one of `None` (random initialization), or the path to the weights file to be loaded. input_shape: optional shape tuple, defaults to (None, None, 3). @@ -320,95 +335,147 @@ def ResNetV2( be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - block_fn: callable, `Block` or `BasicBlock`, the block function to stack. - Use 'basic_block' for ResNet18 and ResNet34. - **kwargs: Pass-through keyword arguments to `tf.keras.Model`. - - Returns: - A `keras.Model` instance. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. + block_type: string, one of "basic_block" or "block". The block type to + stack. Use "basic_block" for ResNet18 and ResNet34. """ - if weights and not tf.io.gfile.exists(weights): - raise ValueError( - "The `weights` argument should be either `None` or the path to the " - "weights file to be loaded. Weights file not found at location: {weights}" - ) - if include_top and not classes: - raise ValueError( - "If `include_top` is True, you should specify `classes`. " - f"Received: classes={classes}" - ) + def __init__( + self, + stackwise_filters, + stackwise_blocks, + stackwise_strides, + include_rescaling, + include_top, + stackwise_dilations=None, + weights=None, + input_shape=(None, None, 3), + input_tensor=None, + pooling=None, + classes=None, + classifier_activation="softmax", + block_type="block", + **kwargs, + ): + if weights and not tf.io.gfile.exists(weights): + raise ValueError( + "The `weights` argument should be either `None` or the path to the " + "weights file to be loaded. Weights file not found at location: {weights}" + ) - if include_top and pooling: - raise ValueError( - f"`pooling` must be `None` when `include_top=True`." - f"Received pooling={pooling} and include_top={include_top}. " - ) + if include_top and not classes: + raise ValueError( + "If `include_top` is True, you should specify `classes`. " + f"Received: classes={classes}" + ) - inputs = utils.parse_model_inputs(input_shape, input_tensor) - x = inputs + if include_top and pooling: + raise ValueError( + f"`pooling` must be `None` when `include_top=True`." + f"Received pooling={pooling} and include_top={include_top}. " + ) - if include_rescaling: - x = layers.Rescaling(1 / 255.0)(x) + inputs = utils.parse_model_inputs(input_shape, input_tensor) + x = inputs - x = layers.Conv2D( - 64, - 7, - strides=2, - use_bias=True, - padding="same", - name="conv1_conv", - )(x) + if include_rescaling: + x = layers.Rescaling(1 / 255.0)(x) - x = layers.MaxPooling2D(3, strides=2, padding="same", name="pool1_pool")(x) - - num_stacks = len(stackwise_filters) - if stackwise_dilations is None: - stackwise_dilations = [1] * num_stacks - - stack_level_outputs = {} - for stack_index in range(num_stacks): - x = Stack( - filters=stackwise_filters[stack_index], - blocks=stackwise_blocks[stack_index], - stride=stackwise_strides[stack_index], - dilations=stackwise_dilations[stack_index], - block_fn=block_fn, - first_shortcut=block_fn == Block or stack_index > 0, - stack_index=stack_index, + x = layers.Conv2D( + 64, + 7, + strides=2, + use_bias=True, + padding="same", + name="conv1_conv", )(x) - stack_level_outputs[stack_index + 2] = x - x = layers.BatchNormalization(axis=BN_AXIS, epsilon=1.001e-5, name="post_bn")(x) - x = layers.Activation("relu", name="post_relu")(x) + x = layers.MaxPooling2D(3, strides=2, padding="same", name="pool1_pool")(x) + + num_stacks = len(stackwise_filters) + if stackwise_dilations is None: + stackwise_dilations = [1] * num_stacks + + stack_level_outputs = {} + for stack_index in range(num_stacks): + x = apply_stack( + x, + filters=stackwise_filters[stack_index], + blocks=stackwise_blocks[stack_index], + stride=stackwise_strides[stack_index], + dilations=stackwise_dilations[stack_index], + block_type=block_type, + first_shortcut=(block_type == "block" or stack_index > 0), + stack_index=stack_index, + ) + stack_level_outputs[stack_index + 2] = x - if include_top: - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( + x = layers.BatchNormalization(axis=BN_AXIS, epsilon=BN_EPSILON, name="post_bn")( x ) - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D(name="max_pool")(x) - - # Create model. - model = tf.keras.Model(inputs, x, name=name, **kwargs) - - if weights is not None: - model.load_weights(weights) - # Set this private attribute for recreate backbone model with outputs at each of the - # resolution level. - model._backbone_level_outputs = stack_level_outputs + x = layers.Activation("relu", name="post_relu")(x) - # Bind the `to_backbone_model` method to the application model. - model.as_backbone = types.MethodType(utils.as_backbone, model) - - return model + if include_top: + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D(name="max_pool")(x) + + # Create model. + super().__init__(inputs=inputs, outputs=x, **kwargs) + + # All references to `self` below this line + if weights is not None: + self.load_weights(weights) + # Set this private attribute for recreate backbone model with outputs at + # each resolution level. + self._backbone_level_outputs = stack_level_outputs + + # Bind the `to_backbone_model` method to the application model. + self.as_backbone = types.MethodType(utils.as_backbone, self) + + self.stackwise_filters = stackwise_filters + self.stackwise_blocks = stackwise_blocks + self.stackwise_strides = stackwise_strides + self.include_rescaling = include_rescaling + self.include_top = include_top + self.stackwise_dilations = stackwise_dilations + self.input_tensor = input_tensor + self.pooling = pooling + self.classes = classes + self.classifier_activation = classifier_activation + self.block_type = block_type + + def get_config(self): + return { + "stackwise_filters": self.stackwise_filters, + "stackwise_blocks": self.stackwise_blocks, + "stackwise_strides": self.stackwise_strides, + "include_rescaling": self.include_rescaling, + "include_top": self.include_top, + # Remove batch dimension from `input_shape` + "input_shape": self.input_shape[1:], + "stackwise_dilations": self.stackwise_dilations, + "input_tensor": self.input_tensor, + "pooling": self.pooling, + "classes": self.classes, + "classifier_activation": self.classifier_activation, + "block_type": self.block_type, + "name": self.name, + "trainable": self.trainable, + } + + @classmethod + def from_config(cls, config): + return cls(**config) def ResNet18V2( @@ -439,7 +506,7 @@ def ResNet18V2( pooling=pooling, classes=classes, classifier_activation=classifier_activation, - block_fn=BasicBlock, + block_type="basic_block", **kwargs, ) @@ -472,7 +539,7 @@ def ResNet34V2( pooling=pooling, classes=classes, classifier_activation=classifier_activation, - block_fn=BasicBlock, + block_type="basic_block", **kwargs, ) @@ -505,6 +572,7 @@ def ResNet50V2( pooling=pooling, classes=classes, classifier_activation=classifier_activation, + block_type="block", **kwargs, ) @@ -536,6 +604,7 @@ def ResNet101V2( pooling=pooling, classes=classes, classifier_activation=classifier_activation, + block_type="block", **kwargs, ) @@ -567,6 +636,7 @@ def ResNet152V2( pooling=pooling, classes=classes, classifier_activation=classifier_activation, + block_type="block", **kwargs, ) diff --git a/keras_cv/models/resnet_v2_test.py b/keras_cv/models/resnet_v2_test.py index d890c1241f..545d6a0039 100644 --- a/keras_cv/models/resnet_v2_test.py +++ b/keras_cv/models/resnet_v2_test.py @@ -14,6 +14,7 @@ import tensorflow as tf from absl.testing import parameterized +from packaging import version from keras_cv.models import resnet_v2 @@ -51,17 +52,17 @@ def test_application_variable_input_channels(self, app, last_dim, args): super()._test_application_variable_input_channels(app, last_dim, args) @parameterized.parameters(*MODEL_LIST) - def test_model_can_be_used_as_backbone(self, app, last_dim, args): - super()._test_model_can_be_used_as_backbone(app, last_dim, args) - - @parameterized.parameters(*MODEL_LIST) - def test_model_serialization(self, app, last_dim, args): + def test_model_serialization_tf(self, app, last_dim, args): super()._test_model_serialization( app, last_dim, args, save_format="tf", filename="model" ) - super()._test_model_serialization( - app, last_dim, args, save_format="keras_v3", filename="model.keras" - ) + + @parameterized.parameters(*MODEL_LIST) + def test_model_serialization_keras_format(self, app, last_dim, args): + if version.parse(tf.__version__) >= version.parse("2.12.0-dev0"): + super()._test_model_serialization( + app, last_dim, args, save_format="keras_v3", filename="model.keras" + ) def test_model_backbone_layer_names_stability(self): model = resnet_v2.ResNet50V2( From 15056344d08d234537fdf5b61d77c66c1fae691b Mon Sep 17 00:00:00 2001 From: Jonathan Bischof Date: Fri, 17 Feb 2023 22:46:47 +0000 Subject: [PATCH 23/27] Enforce line length of 80 chars (#1402) * Enforce line length of 80 chars * Add 80 char limit to `flake8` as well * Apply formatting to merged changes --- .../vectorization_strategy_benchmark.py | 4 +- benchmarks/vectorized_auto_contrast.py | 4 +- benchmarks/vectorized_grayscale.py | 4 +- benchmarks/vectorized_random_brightness.py | 8 +- benchmarks/vectorized_random_color_jitter.py | 4 +- benchmarks/vectorized_random_contrast.py | 12 +- benchmarks/vectorized_random_hue.py | 4 +- benchmarks/vectorized_random_saturation.py | 16 ++- build_deps/configure.py | 6 +- .../anchor_generator_configuration.py | 4 +- .../layers/object_detection/demo_utils.py | 9 +- .../preprocessing/bounding_box/demo_utils.py | 4 +- .../bounding_box/jittered_resize_demo.py | 7 +- .../bounding_box/resizing_demo.py | 5 +- .../classification/rand_augment_demo.py | 5 +- .../random_augmentation_pipeline_demo.py | 4 +- .../random_channel_shift_demo.py | 4 +- .../random_color_degeneration_demo.py | 4 +- .../classification/random_hue_demo.py | 4 +- .../stable_diffusion/text_to_image.py | 4 +- .../classification/imagenet/basic_training.py | 41 +++++-- .../contrastive/imagenet/simclr_training.py | 20 ++- .../pascal_voc/faster_rcnn.py | 34 ++++-- .../object_detection/pascal_voc/retina_net.py | 29 ++++- .../waymo/serialize_records.py | 12 +- .../waymo/train_pillars.py | 16 ++- .../pascal_voc/basic_training.py | 4 +- .../classification_training_benchmark_test.py | 13 +- keras_cv/bounding_box/__init__.py | 4 +- keras_cv/bounding_box/converters.py | 25 +++- keras_cv/bounding_box/converters_test.py | 25 +++- keras_cv/bounding_box/iou.py | 8 +- keras_cv/bounding_box/iou_test.py | 24 +++- .../mask_invalid_detections_test.py | 24 +++- keras_cv/bounding_box/to_dense.py | 8 +- keras_cv/bounding_box/to_ragged_test.py | 5 +- keras_cv/bounding_box/utils.py | 21 +++- keras_cv/bounding_box/utils_test.py | 8 +- keras_cv/bounding_box/validate_format.py | 4 +- keras_cv/bounding_box/validate_format_test.py | 3 +- keras_cv/callbacks/__init__.py | 4 +- keras_cv/callbacks/pycoco_callback.py | 11 +- keras_cv/callbacks/pycoco_callback_test.py | 8 +- .../callbacks/waymo_evaluation_callback.py | 20 ++- .../waymo_evaluation_callback_test.py | 3 +- keras_cv/core/__init__.py | 12 +- .../normal_factor_sampler_test_.py | 8 +- .../factor_sampler/uniform_factor_sampler.py | 6 +- keras_cv/datasets/imagenet/load.py | 8 +- keras_cv/datasets/pascal_voc/segmentation.py | 44 +++++-- .../datasets/pascal_voc/segmentation_test.py | 61 +++++++--- keras_cv/datasets/waymo/load.py | 3 +- keras_cv/datasets/waymo/load_test.py | 6 +- keras_cv/datasets/waymo/transformer.py | 115 +++++++++++++----- keras_cv/datasets/waymo/transformer_test.py | 22 +++- keras_cv/keypoint/converters_test.py | 4 +- keras_cv/keypoint/utils_test.py | 8 +- keras_cv/layers/__init__.py | 28 +++-- keras_cv/layers/feature_pyramid.py | 4 +- keras_cv/layers/feature_pyramid_test.py | 40 ++++-- keras_cv/layers/fusedmbconv.py | 4 +- keras_cv/layers/fusedmbconv_test.py | 4 +- keras_cv/layers/mbconv.py | 8 +- .../object_detection/anchor_generator.py | 19 ++- .../layers/object_detection/box_matcher.py | 18 ++- .../object_detection/box_matcher_test.py | 12 +- .../multi_class_non_max_suppression_test.py | 18 ++- .../retina_net_label_encoder.py | 25 +++- .../retina_net_label_encoder_test.py | 5 +- keras_cv/layers/object_detection/roi_align.py | 80 +++++++++--- .../object_detection/roi_generator_test.py | 83 ++++++++++--- keras_cv/layers/object_detection/roi_pool.py | 17 ++- .../layers/object_detection/roi_pool_test.py | 79 +++++++++--- .../layers/object_detection/roi_sampler.py | 23 +++- .../object_detection/roi_sampler_test.py | 52 ++++++-- .../object_detection/rpn_label_encoder.py | 28 +++-- .../rpn_label_encoder_test.py | 14 ++- keras_cv/layers/object_detection/sampling.py | 4 +- .../layers/object_detection/sampling_test.py | 96 +++++++++++++-- .../center_net_label_encoder.py | 63 +++++++--- .../object_detection_3d/heatmap_decoder.py | 16 ++- .../layers/object_detection_3d/voxel_utils.py | 29 +++-- .../object_detection_3d/voxel_utils_test.py | 8 +- .../object_detection_3d/voxelization.py | 24 +++- .../object_detection_3d/voxelization_test.py | 8 +- keras_cv/layers/preprocessing/__init__.py | 20 ++- keras_cv/layers/preprocessing/aug_mix.py | 26 ++-- keras_cv/layers/preprocessing/augmenter.py | 4 +- .../layers/preprocessing/auto_contrast.py | 8 +- .../preprocessing/auto_contrast_test.py | 3 +- .../base_image_augmentation_layer.py | 40 ++++-- .../base_image_augmentation_layer_test.py | 44 +++++-- .../layers/preprocessing/channel_shuffle.py | 8 +- .../preprocessing/channel_shuffle_test.py | 4 +- keras_cv/layers/preprocessing/cut_mix.py | 8 +- keras_cv/layers/preprocessing/cut_mix_test.py | 8 +- keras_cv/layers/preprocessing/equalization.py | 10 +- keras_cv/layers/preprocessing/fourier_mix.py | 36 ++++-- .../layers/preprocessing/fourier_mix_test.py | 8 +- keras_cv/layers/preprocessing/grayscale.py | 8 +- keras_cv/layers/preprocessing/grid_mask.py | 4 +- .../layers/preprocessing/grid_mask_test.py | 8 +- .../preprocessing/jittered_resize_test.py | 8 +- keras_cv/layers/preprocessing/maybe_apply.py | 8 +- keras_cv/layers/preprocessing/mix_up.py | 16 ++- keras_cv/layers/preprocessing/mix_up_test.py | 18 ++- keras_cv/layers/preprocessing/mosaic.py | 23 +++- keras_cv/layers/preprocessing/mosaic_test.py | 18 ++- .../layers/preprocessing/posterization.py | 8 +- .../preprocessing/posterization_test.py | 8 +- keras_cv/layers/preprocessing/rand_augment.py | 27 +++- .../layers/preprocessing/rand_augment_test.py | 25 +++- .../preprocessing/random_aspect_ratio.py | 14 ++- .../preprocessing/random_aspect_ratio_test.py | 4 +- .../random_augmentation_pipeline_test.py | 16 ++- .../layers/preprocessing/random_brightness.py | 12 +- .../preprocessing/random_brightness_test.py | 24 +++- .../preprocessing/random_channel_shift.py | 4 +- .../random_channel_shift_test.py | 24 +++- .../preprocessing/random_choice_test.py | 8 +- .../random_color_degeneration.py | 8 +- .../preprocessing/random_color_jitter.py | 4 +- .../preprocessing/random_color_jitter_test.py | 16 ++- .../layers/preprocessing/random_contrast.py | 8 +- .../preprocessing/random_contrast_test.py | 4 +- keras_cv/layers/preprocessing/random_crop.py | 32 +++-- .../preprocessing/random_crop_and_resize.py | 28 +++-- .../random_crop_and_resize_test.py | 28 +++-- .../layers/preprocessing/random_crop_test.py | 4 +- .../preprocessing/random_cutout_test.py | 12 +- keras_cv/layers/preprocessing/random_flip.py | 20 ++- .../layers/preprocessing/random_flip_test.py | 36 ++++-- .../preprocessing/random_gaussian_blur.py | 10 +- .../random_gaussian_blur_test.py | 16 ++- keras_cv/layers/preprocessing/random_hue.py | 16 ++- .../layers/preprocessing/random_hue_test.py | 18 ++- .../preprocessing/random_jpeg_quality.py | 10 +- .../layers/preprocessing/random_rotation.py | 15 ++- .../preprocessing/random_rotation_test.py | 29 +++-- .../layers/preprocessing/random_saturation.py | 16 ++- .../preprocessing/random_saturation_test.py | 24 +++- .../layers/preprocessing/random_sharpness.py | 10 +- keras_cv/layers/preprocessing/random_shear.py | 20 ++- .../layers/preprocessing/random_shear_test.py | 26 +++- .../preprocessing/random_translation_test.py | 28 +++-- keras_cv/layers/preprocessing/random_zoom.py | 8 +- .../layers/preprocessing/random_zoom_test.py | 4 +- .../preprocessing/randomly_zoomed_crop.py | 24 +++- .../preprocessing/repeated_augmentation.py | 8 +- keras_cv/layers/preprocessing/rescaling.py | 8 +- keras_cv/layers/preprocessing/resizing.py | 16 ++- .../layers/preprocessing/resizing_test.py | 28 +++-- keras_cv/layers/preprocessing/solarization.py | 18 ++- .../layers/preprocessing/solarization_test.py | 12 +- ...ectorized_base_image_augmentation_layer.py | 16 ++- ...ized_base_image_augmentation_layer_test.py | 48 ++++++-- .../with_segmentation_masks_test.py | 18 ++- keras_cv/layers/preprocessing_3d/__init__.py | 8 +- .../base_augmentation_layer_3d.py | 4 +- .../base_augmentation_layer_3d_test.py | 8 +- .../frustum_random_dropping_points.py | 20 ++- .../frustum_random_dropping_points_test.py | 4 +- .../frustum_random_point_feature_noise.py | 10 +- ...frustum_random_point_feature_noise_test.py | 10 +- .../global_random_dropping_points.py | 4 +- .../preprocessing_3d/global_random_flip.py | 3 +- .../global_random_flip_test.py | 24 ++-- .../global_random_rotation.py | 29 ++++- .../global_random_rotation_test.py | 20 ++- .../preprocessing_3d/global_random_scaling.py | 4 +- .../global_random_scaling_test.py | 16 ++- .../global_random_translation.py | 13 +- .../global_random_translation_test.py | 16 ++- .../group_points_by_bounding_boxes.py | 22 +++- .../group_points_by_bounding_boxes_test.py | 60 +++++++-- .../preprocessing_3d/random_copy_paste.py | 49 +++++--- .../random_copy_paste_test.py | 14 ++- .../preprocessing_3d/random_drop_box.py | 4 +- .../preprocessing_3d/swap_background.py | 19 ++- .../preprocessing_3d/swap_background_test.py | 6 +- .../layers/regularization/dropblock_2d.py | 13 +- .../layers/regularization/squeeze_excite.py | 16 ++- .../regularization/squeeze_excite_test.py | 3 +- .../regularization/stochastic_depth_test.py | 3 +- keras_cv/layers/serialization_test.py | 4 +- keras_cv/layers/spatial_pyramid.py | 4 +- keras_cv/layers/transformer_encoder.py | 8 +- keras_cv/layers/vit_layers.py | 13 +- keras_cv/layers/vit_layers_test.py | 11 +- keras_cv/losses/focal.py | 4 +- keras_cv/losses/focal_test.py | 8 +- keras_cv/losses/giou_loss.py | 4 +- keras_cv/losses/giou_loss_test.py | 16 ++- keras_cv/losses/iou_loss_test.py | 16 ++- .../focal_loss_numerical_test.py | 11 +- .../losses/penalty_reduced_focal_loss_test.py | 20 ++- keras_cv/losses/serialization_test.py | 8 +- keras_cv/losses/simclr_loss.py | 12 +- keras_cv/losses/simclr_loss_test.py | 4 +- keras_cv/metrics/__init__.py | 4 +- keras_cv/metrics/coco/__init__.py | 4 +- .../metrics/coco/mean_average_precision.py | 60 ++++++--- .../coco/mean_average_precision_test.py | 9 +- .../numerical_tests/GenerateSamples.ipynb | 12 +- keras_cv/metrics/coco/pycoco_wrapper.py | 8 +- keras_cv/metrics/coco/recall.py | 24 +++- keras_cv/metrics/coco/recall_test.py | 32 +++-- keras_cv/metrics/coco/utils.py | 8 +- keras_cv/metrics/coco/utils_test.py | 12 +- keras_cv/models/__init__.py | 4 +- keras_cv/models/__internal__/__init__.py | 4 +- keras_cv/models/__internal__/darknet_utils.py | 18 ++- keras_cv/models/convmixer.py | 43 +++++-- keras_cv/models/convnext.py | 26 ++-- keras_cv/models/csp_darknet.py | 30 +++-- keras_cv/models/darknet.py | 26 ++-- keras_cv/models/densenet.py | 22 ++-- keras_cv/models/efficientnet_lite.py | 8 +- keras_cv/models/efficientnet_lite_test.py | 4 +- keras_cv/models/efficientnet_v1.py | 12 +- keras_cv/models/mlp_mixer.py | 14 ++- keras_cv/models/mobilenet_v3.py | 109 +++++++++++------ keras_cv/models/models_test.py | 8 +- .../models/object_detection/__internal__.py | 4 +- .../models/object_detection/__test_utils__.py | 10 +- .../models/object_detection/faster_rcnn.py | 58 ++++++--- .../object_detection/faster_rcnn_test.py | 20 ++- .../models/object_detection/predict_utils.py | 4 +- .../__internal__/layers/prediction_head.py | 4 +- .../object_detection/retina_net/retina_net.py | 28 +++-- .../retina_net/retina_net_inference_test.py | 38 ++++-- .../retina_net/retina_net_test.py | 43 +++++-- .../object_detection_3d/center_pillar.py | 9 +- .../object_detection_3d/center_pillar_test.py | 18 ++- keras_cv/models/regnet.py | 12 +- keras_cv/models/resnet_v1.py | 51 ++++++-- keras_cv/models/resnet_v2.py | 48 ++++++-- keras_cv/models/resnet_v2_test.py | 6 +- keras_cv/models/segmentation/deeplab.py | 41 +++++-- keras_cv/models/segmentation/deeplab_test.py | 23 +++- .../models/stable_diffusion/clip_tokenizer.py | 13 +- .../stable_diffusion/diffusion_model.py | 38 ++++-- .../stable_diffusion/noise_scheduler.py | 32 +++-- .../stable_diffusion/stable_diffusion.py | 37 ++++-- .../stable_diffusion/stable_diffusion_test.py | 7 +- .../models/stable_diffusion/text_encoder.py | 35 ++++-- keras_cv/models/utils_test.py | 4 +- keras_cv/models/vgg16.py | 6 +- keras_cv/models/vgg19.py | 6 +- keras_cv/models/vit.py | 8 +- keras_cv/ops/iou_3d_test.py | 3 +- keras_cv/point_cloud/point_cloud.py | 21 +++- keras_cv/point_cloud/point_cloud_test.py | 30 +++-- keras_cv/point_cloud/within_box_3d_test.py | 43 +++++-- .../ViT_weight_conversion.ipynb | 36 ++++-- .../contrastive/contrastive_trainer.py | 20 ++- .../contrastive/contrastive_trainer_test.py | 8 +- .../training/contrastive/simclr_trainer.py | 3 +- .../contrastive/simclr_trainer_test.py | 4 +- keras_cv/utils/__init__.py | 4 +- keras_cv/utils/conv_utils.py | 6 +- keras_cv/utils/fill_utils.py | 8 +- keras_cv/utils/fill_utils_test.py | 4 +- keras_cv/utils/preprocessing.py | 39 ++++-- keras_cv/utils/resource_loader.py | 4 +- keras_cv/utils/target_gather.py | 4 +- keras_cv/utils/train.py | 8 +- keras_cv/version_check_test.py | 3 +- setup.cfg | 4 +- shell/format.sh | 4 +- shell/lint.sh | 2 +- shell/weights/remove_top.py | 4 +- shell/weights/update_training_history.py | 22 +++- 273 files changed, 3519 insertions(+), 1168 deletions(-) diff --git a/benchmarks/vectorization_strategy_benchmark.py b/benchmarks/vectorization_strategy_benchmark.py index e94e9a1c82..888d98d353 100644 --- a/benchmarks/vectorization_strategy_benchmark.py +++ b/benchmarks/vectorization_strategy_benchmark.py @@ -72,7 +72,9 @@ def single_rectangle_mask(corners, mask_shape): return masks -def fill_single_rectangle(image, centers_x, centers_y, widths, heights, fill_values): +def fill_single_rectangle( + image, centers_x, centers_y, widths, heights, fill_values +): """Fill rectangles with fill value into images. Args: diff --git a/benchmarks/vectorized_auto_contrast.py b/benchmarks/vectorized_auto_contrast.py index acd18fce83..4b1b5d1e81 100644 --- a/benchmarks/vectorized_auto_contrast.py +++ b/benchmarks/vectorized_auto_contrast.py @@ -79,7 +79,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/benchmarks/vectorized_grayscale.py b/benchmarks/vectorized_grayscale.py index 3d3b8d3977..ffa1938744 100644 --- a/benchmarks/vectorized_grayscale.py +++ b/benchmarks/vectorized_grayscale.py @@ -90,7 +90,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/benchmarks/vectorized_random_brightness.py b/benchmarks/vectorized_random_brightness.py index 823b6fdfc9..e2b0fbcdb9 100644 --- a/benchmarks/vectorized_random_brightness.py +++ b/benchmarks/vectorized_random_brightness.py @@ -74,10 +74,14 @@ def augment_image(self, image, transformation, **kwargs): def augment_label(self, label, transformation, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask - def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + def augment_bounding_boxes( + self, bounding_boxes, transformation=None, **kwargs + ): return bounding_boxes def get_random_transformation(self, **kwargs): diff --git a/benchmarks/vectorized_random_color_jitter.py b/benchmarks/vectorized_random_color_jitter.py index 963d4dfb8e..e76dcc2fa5 100644 --- a/benchmarks/vectorized_random_color_jitter.py +++ b/benchmarks/vectorized_random_color_jitter.py @@ -140,7 +140,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/benchmarks/vectorized_random_contrast.py b/benchmarks/vectorized_random_contrast.py index a568b03484..63696269ac 100644 --- a/benchmarks/vectorized_random_contrast.py +++ b/benchmarks/vectorized_random_contrast.py @@ -76,7 +76,9 @@ def get_random_transformation(self, **kwargs): def augment_image(self, image, transformation, **kwargs): contrast_factor = transformation - output = tf.image.adjust_contrast(image, contrast_factor=contrast_factor) + output = tf.image.adjust_contrast( + image, contrast_factor=contrast_factor + ) output = tf.clip_by_value(output, 0, 255) output.set_shape(image.shape) return output @@ -84,10 +86,14 @@ def augment_image(self, image, transformation, **kwargs): def augment_label(self, label, transformation, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask - def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + def augment_bounding_boxes( + self, bounding_boxes, transformation=None, **kwargs + ): return bounding_boxes def get_config(self): diff --git a/benchmarks/vectorized_random_hue.py b/benchmarks/vectorized_random_hue.py index 68132e7c94..b91dd410e6 100644 --- a/benchmarks/vectorized_random_hue.py +++ b/benchmarks/vectorized_random_hue.py @@ -86,7 +86,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/benchmarks/vectorized_random_saturation.py b/benchmarks/vectorized_random_saturation.py index 426d4f8382..1188caf5f4 100644 --- a/benchmarks/vectorized_random_saturation.py +++ b/benchmarks/vectorized_random_saturation.py @@ -74,15 +74,21 @@ def augment_image(self, image, transformation=None, **kwargs): # it will be handled correctly when it is a one tensor. transformation = tf.convert_to_tensor(transformation) adjust_factor = transformation / (1 - transformation) - return tf.image.adjust_saturation(image, saturation_factor=adjust_factor) + return tf.image.adjust_saturation( + image, saturation_factor=adjust_factor + ) - def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + def augment_bounding_boxes( + self, bounding_boxes, transformation=None, **kwargs + ): return bounding_boxes def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): @@ -96,7 +102,9 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["factor"], dict): - config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + config["factor"] = tf.keras.utils.deserialize_keras_object( + config["factor"] + ) return cls(**config) diff --git a/build_deps/configure.py b/build_deps/configure.py index 35a0a0628e..d925daca99 100644 --- a/build_deps/configure.py +++ b/build_deps/configure.py @@ -158,7 +158,11 @@ def create_build_configuration(): write("build:windows --host_cxxopt=/std:" + get_cpp_version()) if is_macos() or is_linux(): - if not is_linux_ppc64le() and not is_linux_arm() and not is_linux_aarch64(): + if ( + not is_linux_ppc64le() + and not is_linux_arm() + and not is_linux_aarch64() + ): write("build --copt=-mavx") write("build --cxxopt=-std=" + get_cpp_version()) write("build --host_cxxopt=-std=" + get_cpp_version()) diff --git a/examples/layers/object_detection/anchor_generator_configuration.py b/examples/layers/object_detection/anchor_generator_configuration.py index 13f8d9f1b5..a9d1844193 100644 --- a/examples/layers/object_detection/anchor_generator_configuration.py +++ b/examples/layers/object_detection/anchor_generator_configuration.py @@ -48,5 +48,7 @@ def pair_with_anchor_boxes(inputs): if __name__ == "__main__": dataset = demo_utils.load_voc_dataset(bounding_box_format="xywh") - result = dataset.map(pair_with_anchor_boxes, num_parallel_calls=tf.data.AUTOTUNE) + result = dataset.map( + pair_with_anchor_boxes, num_parallel_calls=tf.data.AUTOTUNE + ) demo_utils.visualize_data(result, bounding_box_format="xywh") diff --git a/examples/layers/object_detection/demo_utils.py b/examples/layers/object_detection/demo_utils.py index 912bb63383..8429f56d84 100644 --- a/examples/layers/object_detection/demo_utils.py +++ b/examples/layers/object_detection/demo_utils.py @@ -29,7 +29,10 @@ def preprocess_voc(inputs, format, image_size): source="rel_yxyx", target=format, ) - return {"images": inputs["image"], "bounding_boxes": inputs["objects"]["bbox"]} + return { + "images": inputs["image"], + "bounding_boxes": inputs["objects"]["bbox"], + } def load_voc_dataset( @@ -40,7 +43,9 @@ def load_voc_dataset( ): dataset = tfds.load(name, split=tfds.Split.TRAIN, shuffle_files=True) dataset = dataset.map( - lambda x: preprocess_voc(x, format=bounding_box_format, image_size=image_size), + lambda x: preprocess_voc( + x, format=bounding_box_format, image_size=image_size + ), num_parallel_calls=tf.data.AUTOTUNE, ) dataset = dataset.padded_batch( diff --git a/examples/layers/preprocessing/bounding_box/demo_utils.py b/examples/layers/preprocessing/bounding_box/demo_utils.py index bfd032129e..4eb3c89fce 100644 --- a/examples/layers/preprocessing/bounding_box/demo_utils.py +++ b/examples/layers/preprocessing/bounding_box/demo_utils.py @@ -45,7 +45,9 @@ def load_voc_dataset( lambda x: preprocess_voc(x, format=bounding_box_format), num_parallel_calls=tf.data.AUTOTUNE, ) - dataset = dataset.apply(tf.data.experimental.dense_to_ragged_batch(batch_size)) + dataset = dataset.apply( + tf.data.experimental.dense_to_ragged_batch(batch_size) + ) return dataset diff --git a/examples/layers/preprocessing/bounding_box/jittered_resize_demo.py b/examples/layers/preprocessing/bounding_box/jittered_resize_demo.py index 3e7588aebe..3878252b2a 100644 --- a/examples/layers/preprocessing/bounding_box/jittered_resize_demo.py +++ b/examples/layers/preprocessing/bounding_box/jittered_resize_demo.py @@ -19,7 +19,9 @@ def main(): augment = keras_cv.layers.JitteredResize( - target_size=(640, 640), scale_factor=(0.75, 1.3), bounding_box_format="xywh" + target_size=(640, 640), + scale_factor=(0.75, 1.3), + bounding_box_format="xywh", ) dataset = demo_utils.load_voc_dataset(bounding_box_format="xywh") dataset = dataset.map( @@ -28,7 +30,8 @@ def main(): demo_utils.visualize_data(dataset, bounding_box_format="xywh") dataset = dataset.map( - lambda x: augment(x, training=False), num_parallel_calls=tf.data.AUTOTUNE + lambda x: augment(x, training=False), + num_parallel_calls=tf.data.AUTOTUNE, ) demo_utils.visualize_data(dataset, bounding_box_format="xywh") diff --git a/examples/layers/preprocessing/bounding_box/resizing_demo.py b/examples/layers/preprocessing/bounding_box/resizing_demo.py index 2a974ae931..caf7150a1d 100644 --- a/examples/layers/preprocessing/bounding_box/resizing_demo.py +++ b/examples/layers/preprocessing/bounding_box/resizing_demo.py @@ -23,7 +23,10 @@ def main(): dataset = demo_utils.load_voc_dataset(bounding_box_format="xywh") resizing = layers.Resizing( - height=300, width=400, pad_to_aspect_ratio=True, bounding_box_format="xywh" + height=300, + width=400, + pad_to_aspect_ratio=True, + bounding_box_format="xywh", ) dataset = dataset.map(resizing) demo_utils.visualize_data(dataset, bounding_box_format="xywh") diff --git a/examples/layers/preprocessing/classification/rand_augment_demo.py b/examples/layers/preprocessing/classification/rand_augment_demo.py index 38d6c3da55..133f59aaa7 100644 --- a/examples/layers/preprocessing/classification/rand_augment_demo.py +++ b/examples/layers/preprocessing/classification/rand_augment_demo.py @@ -26,7 +26,10 @@ def main(): ds = demo_utils.load_oxford_dataset() rand_augment = preprocessing.RandAugment( - value_range=(0, 255), augmentations_per_image=3, magnitude=0.5, rate=0.875 + value_range=(0, 255), + augmentations_per_image=3, + magnitude=0.5, + rate=0.875, ) ds = ds.map(rand_augment, num_parallel_calls=tf.data.AUTOTUNE) demo_utils.visualize_dataset(ds) diff --git a/examples/layers/preprocessing/classification/random_augmentation_pipeline_demo.py b/examples/layers/preprocessing/classification/random_augmentation_pipeline_demo.py index 35a67b08a9..87fb472c9e 100644 --- a/examples/layers/preprocessing/classification/random_augmentation_pipeline_demo.py +++ b/examples/layers/preprocessing/classification/random_augmentation_pipeline_demo.py @@ -27,7 +27,9 @@ def create_custom_pipeline(): layers = preprocessing.RandAugment.get_standard_policy( value_range=(0, 255), magnitude=0.75, magnitude_stddev=0.3 ) - layers = layers[:4] # slice out some layers you don't want for whatever reason + layers = layers[ + :4 + ] # slice out some layers you don't want for whatever reason layers = layers + [preprocessing.GridMask()] return preprocessing.RandomAugmentationPipeline( layers=layers, augmentations_per_image=3 diff --git a/examples/layers/preprocessing/classification/random_channel_shift_demo.py b/examples/layers/preprocessing/classification/random_channel_shift_demo.py index 50e291023d..34ffb33533 100644 --- a/examples/layers/preprocessing/classification/random_channel_shift_demo.py +++ b/examples/layers/preprocessing/classification/random_channel_shift_demo.py @@ -27,7 +27,9 @@ def main(): ds = demo_utils.load_oxford_dataset() - rgbshift = preprocessing.RandomChannelShift(value_range=(0, 255), factor=0.4) + rgbshift = preprocessing.RandomChannelShift( + value_range=(0, 255), factor=0.4 + ) ds = ds.map(rgbshift, num_parallel_calls=tf.data.AUTOTUNE) demo_utils.visualize_dataset(ds) diff --git a/examples/layers/preprocessing/classification/random_color_degeneration_demo.py b/examples/layers/preprocessing/classification/random_color_degeneration_demo.py index 07ea437521..aefbaa37ff 100644 --- a/examples/layers/preprocessing/classification/random_color_degeneration_demo.py +++ b/examples/layers/preprocessing/classification/random_color_degeneration_demo.py @@ -25,7 +25,9 @@ def main(): ds = demo_utils.load_oxford_dataset() - random_color_degeneration = preprocessing.RandomColorDegeneration(factor=(0, 1.0)) + random_color_degeneration = preprocessing.RandomColorDegeneration( + factor=(0, 1.0) + ) ds = ds.map(random_color_degeneration, num_parallel_calls=tf.data.AUTOTUNE) demo_utils.visualize_dataset(ds) diff --git a/examples/layers/preprocessing/classification/random_hue_demo.py b/examples/layers/preprocessing/classification/random_hue_demo.py index 926147def7..6cbb99a262 100644 --- a/examples/layers/preprocessing/classification/random_hue_demo.py +++ b/examples/layers/preprocessing/classification/random_hue_demo.py @@ -24,7 +24,9 @@ def main(): ds = demo_utils.load_oxford_dataset() - random_hue = preprocessing.RandomHue(factor=(0.0, 1.0), value_range=(0, 255)) + random_hue = preprocessing.RandomHue( + factor=(0.0, 1.0), value_range=(0, 255) + ) ds = ds.map(random_hue, num_parallel_calls=tf.data.AUTOTUNE) demo_utils.visualize_dataset(ds) diff --git a/examples/models/generative/stable_diffusion/text_to_image.py b/examples/models/generative/stable_diffusion/text_to_image.py index 4c961850bc..e95fde98ec 100644 --- a/examples/models/generative/stable_diffusion/text_to_image.py +++ b/examples/models/generative/stable_diffusion/text_to_image.py @@ -11,6 +11,8 @@ from keras_cv.models import StableDiffusion model = StableDiffusion(img_height=512, img_width=512, jit_compile=True) -img = model.text_to_image("Photograph of a beautiful horse running through a field") +img = model.text_to_image( + "Photograph of a beautiful horse running through a field" +) Image.fromarray(img[0]).save("horse.png") print("Saved at horse.png") diff --git a/examples/training/classification/imagenet/basic_training.py b/examples/training/classification/imagenet/basic_training.py index 7b512865a4..2e84a829f7 100644 --- a/examples/training/classification/imagenet/basic_training.py +++ b/examples/training/classification/imagenet/basic_training.py @@ -52,15 +52,21 @@ flags.DEFINE_string( "model_name", None, "The name of the model in KerasCV.models to use." ) -flags.DEFINE_string("imagenet_path", None, "Directory from which to load Imagenet.") +flags.DEFINE_string( + "imagenet_path", None, "Directory from which to load Imagenet." +) flags.DEFINE_string( "backup_path", None, "Directory which will be used for training backups." ) flags.DEFINE_string( - "weights_path", None, "Directory which will be used to store weight checkpoints." + "weights_path", + None, + "Directory which will be used to store weight checkpoints.", ) flags.DEFINE_string( - "tensorboard_path", None, "Directory which will be used to store tensorboard logs." + "tensorboard_path", + None, + "Directory which will be used to store tensorboard logs.", ) flags.DEFINE_integer( "batch_size", @@ -150,7 +156,9 @@ print("Number of accelerators: ", strategy.num_replicas_in_sync) BATCH_SIZE = FLAGS.batch_size * strategy.num_replicas_in_sync -INITIAL_LEARNING_RATE = FLAGS.initial_learning_rate * strategy.num_replicas_in_sync +INITIAL_LEARNING_RATE = ( + FLAGS.initial_learning_rate * strategy.num_replicas_in_sync +) """TFRecord-based tf.data.Dataset loads lazily so we can't get the length of the dataset. Temporary.""" NUM_IMAGES = 1281167 @@ -245,7 +253,12 @@ def augment(img, label): def lr_warmup_cosine_decay( - global_step, warmup_steps, hold=0, total_steps=0, start_lr=0.0, target_lr=1e-2 + global_step, + warmup_steps, + hold=0, + total_steps=0, + start_lr=0.0, + target_lr=1e-2, ): # Cosine decay learning_rate = ( @@ -269,7 +282,9 @@ def lr_warmup_cosine_decay( global_step > warmup_steps + hold, learning_rate, target_lr ) - learning_rate = tf.where(global_step < warmup_steps, warmup_lr, learning_rate) + learning_rate = tf.where( + global_step < warmup_steps, warmup_lr, learning_rate + ) return learning_rate @@ -292,7 +307,9 @@ def lr_warmup_cosine_decay( class WarmUpCosineDecay(keras.optimizers.schedules.LearningRateSchedule): - def __init__(self, warmup_steps, total_steps, hold, start_lr=0.0, target_lr=1e-2): + def __init__( + self, warmup_steps, total_steps, hold, start_lr=0.0, target_lr=1e-2 + ): super().__init__() self.start_lr = start_lr self.target_lr = target_lr @@ -371,13 +388,19 @@ def __call__(self, step): callbacks.ModelCheckpoint( FLAGS.weights_path, save_weights_only=True, save_best_only=True ), - callbacks.TensorBoard(log_dir=FLAGS.tensorboard_path, write_steps_per_second=True), + callbacks.TensorBoard( + log_dir=FLAGS.tensorboard_path, write_steps_per_second=True + ), ] if FLAGS.learning_rate_schedule == REDUCE_ON_PLATEAU: model_callbacks.append( callbacks.ReduceLROnPlateau( - monitor="val_loss", factor=0.1, patience=10, min_delta=0.001, min_lr=0.0001 + monitor="val_loss", + factor=0.1, + patience=10, + min_delta=0.001, + min_lr=0.0001, ) ) diff --git a/examples/training/contrastive/imagenet/simclr_training.py b/examples/training/contrastive/imagenet/simclr_training.py index 400ceae133..66779719ec 100644 --- a/examples/training/contrastive/imagenet/simclr_training.py +++ b/examples/training/contrastive/imagenet/simclr_training.py @@ -30,17 +30,25 @@ flags.DEFINE_string( "model_name", None, "The name of the model in KerasCV.models to use." ) -flags.DEFINE_string("imagenet_path", None, "Directory from which to load Imagenet.") +flags.DEFINE_string( + "imagenet_path", None, "Directory from which to load Imagenet." +) flags.DEFINE_string( "backup_path", None, "Directory which will be used for training backups." ) flags.DEFINE_string( - "weights_path", None, "Directory which will be used to store weight checkpoints." + "weights_path", + None, + "Directory which will be used to store weight checkpoints.", ) flags.DEFINE_string( - "tensorboard_path", None, "Directory which will be used to store tensorboard logs." + "tensorboard_path", + None, + "Directory which will be used to store tensorboard logs.", +) +flags.DEFINE_integer( + "batch_size", 256, "Batch size for training and evaluation." ) -flags.DEFINE_integer("batch_size", 256, "Batch size for training and evaluation.") flags.DEFINE_boolean( "use_xla", True, "Whether or not to use XLA (jit_compile) for training." ) @@ -98,7 +106,9 @@ ) optimizer = optimizers.SGD( - learning_rate=FLAGS.initial_learning_rate, momentum=0.9, global_clipnorm=10 + learning_rate=FLAGS.initial_learning_rate, + momentum=0.9, + global_clipnorm=10, ) loss_fn = losses.SimCLRLoss(temperature=0.5, reduction="none") probe_loss = keras.losses.CategoricalCrossentropy( diff --git a/examples/training/object_detection/pascal_voc/faster_rcnn.py b/examples/training/object_detection/pascal_voc/faster_rcnn.py index ac7da5417a..e79e7f2c61 100644 --- a/examples/training/object_detection/pascal_voc/faster_rcnn.py +++ b/examples/training/object_detection/pascal_voc/faster_rcnn.py @@ -59,7 +59,12 @@ "voc/2007", split="train+validation", with_info=False, shuffle_files=True ) train_ds = train_ds.concatenate( - tfds.load("voc/2012", split="train+validation", with_info=False, shuffle_files=True) + tfds.load( + "voc/2012", + split="train+validation", + with_info=False, + shuffle_files=True, + ) ) eval_ds = tfds.load("voc/2007", split="test", with_info=False) @@ -82,7 +87,8 @@ ] ] backbone = tf.keras.Model( - inputs=inputs, outputs={2: c2_output, 3: c3_output, 4: c4_output, 5: c5_output} + inputs=inputs, + outputs={2: c2_output, 3: c3_output, 4: c4_output, 5: c5_output}, ) # keras_cv backbone gives 2mAP lower result. # TODO(ian): should eventually use keras_cv backbone. @@ -204,7 +210,9 @@ def get_non_empty_box_indices(boxes): # Selects indices if box height or width is 0. height = boxes[:, 2] - boxes[:, 0] width = boxes[:, 3] - boxes[:, 1] - indices = tf.where(tf.logical_and(tf.greater(height, 0), tf.greater(width, 0))) + indices = tf.where( + tf.logical_and(tf.greater(height, 0), tf.greater(width, 0)) + ) return indices[:, 0] @@ -257,8 +265,12 @@ def apply(inputs): def pad_fn(examples): gt_boxes = examples.pop("gt_boxes") gt_classes = examples.pop("gt_classes") - gt_boxes = gt_boxes.to_tensor(default_value=-1.0, shape=[global_batch, 32, 4]) - gt_classes = gt_classes.to_tensor(default_value=-1.0, shape=[global_batch, 32]) + gt_boxes = gt_boxes.to_tensor( + default_value=-1.0, shape=[global_batch, 32, 4] + ) + gt_classes = gt_classes.to_tensor( + default_value=-1.0, shape=[global_batch, 32] + ) return examples["images"], { "boxes": gt_boxes, "classes": gt_classes, @@ -270,7 +282,9 @@ def pad_fn(examples): num_parallel_calls=tf.data.AUTOTUNE, ) train_ds = train_ds.apply( - tf.data.experimental.dense_to_ragged_batch(global_batch, drop_remainder=True) + tf.data.experimental.dense_to_ragged_batch( + global_batch, drop_remainder=True + ) ) train_ds = train_ds.map(pad_fn, num_parallel_calls=tf.data.AUTOTUNE) train_ds = train_ds.shuffle(8) @@ -281,7 +295,9 @@ def pad_fn(examples): num_parallel_calls=tf.data.AUTOTUNE, ) eval_ds = eval_ds.apply( - tf.data.experimental.dense_to_ragged_batch(global_batch, drop_remainder=True) + tf.data.experimental.dense_to_ragged_batch( + global_batch, drop_remainder=True + ) ) eval_ds = eval_ds.map(pad_fn, num_parallel_calls=tf.data.AUTOTUNE) eval_ds = eval_ds.prefetch(2) @@ -301,7 +317,9 @@ def pad_fn(examples): step = 0 callbacks = [ - tf.keras.callbacks.ModelCheckpoint(FLAGS.weights_path, save_weights_only=True), + tf.keras.callbacks.ModelCheckpoint( + FLAGS.weights_path, save_weights_only=True + ), tf.keras.callbacks.TensorBoard( log_dir=FLAGS.tensorboard_path, write_steps_per_second=True ), diff --git a/examples/training/object_detection/pascal_voc/retina_net.py b/examples/training/object_detection/pascal_voc/retina_net.py index ef2c0e378d..cab44434b1 100644 --- a/examples/training/object_detection/pascal_voc/retina_net.py +++ b/examples/training/object_detection/pascal_voc/retina_net.py @@ -70,7 +70,12 @@ "voc/2007", split="train+validation", with_info=False, shuffle_files=True ) train_ds = train_ds.concatenate( - tfds.load("voc/2012", split="train+validation", with_info=False, shuffle_files=True) + tfds.load( + "voc/2012", + split="train+validation", + with_info=False, + shuffle_files=True, + ) ) eval_ds = tfds.load("voc/2007", split="test", with_info=False) @@ -185,7 +190,9 @@ def get_non_empty_box_indices(boxes): # Selects indices if box height or width is 0. height = boxes[:, 2] - boxes[:, 0] width = boxes[:, 3] - boxes[:, 1] - indices = tf.where(tf.logical_and(tf.greater(height, 0), tf.greater(width, 0))) + indices = tf.where( + tf.logical_and(tf.greater(height, 0), tf.greater(width, 0)) + ) return indices[:, 0] @@ -293,7 +300,9 @@ def pad_fn(images, bounding_boxes): ) train_ds = train_ds.apply( - tf.data.experimental.dense_to_ragged_batch(GLOBAL_BATCH_SIZE, drop_remainder=True) + tf.data.experimental.dense_to_ragged_batch( + GLOBAL_BATCH_SIZE, drop_remainder=True + ) ) train_ds = train_ds.map(pad_fn, num_parallel_calls=tf.data.AUTOTUNE) train_ds = train_ds.shuffle(8 * strategy.num_replicas_in_sync) @@ -304,7 +313,9 @@ def pad_fn(images, bounding_boxes): num_parallel_calls=tf.data.AUTOTUNE, ) eval_ds = eval_ds.apply( - tf.data.experimental.dense_to_ragged_batch(GLOBAL_BATCH_SIZE, drop_remainder=True) + tf.data.experimental.dense_to_ragged_batch( + GLOBAL_BATCH_SIZE, drop_remainder=True + ) ) eval_ds = eval_ds.map(pad_fn, num_parallel_calls=tf.data.AUTOTUNE) eval_ds = eval_ds.prefetch(tf.data.AUTOTUNE) @@ -329,9 +340,15 @@ def pad_fn(images, bounding_boxes): c3_output, c4_output, c5_output = [ backbone.get_layer(layer_name).output - for layer_name in ["conv3_block4_out", "conv4_block6_out", "conv5_block3_out"] + for layer_name in [ + "conv3_block4_out", + "conv4_block6_out", + "conv5_block3_out", + ] ] - backbone = keras.Model(inputs=inputs, outputs=[c3_output, c4_output, c5_output]) + backbone = keras.Model( + inputs=inputs, outputs=[c3_output, c4_output, c5_output] + ) # keras_cv backbone gives 4mAP lower result. # TODO(ian): should eventually use keras_cv backbone. # backbone = keras_cv.models.ResNet50( diff --git a/examples/training/object_detection_3d/waymo/serialize_records.py b/examples/training/object_detection_3d/waymo/serialize_records.py index d7aea18e1f..08f1e69f16 100644 --- a/examples/training/object_detection_3d/waymo/serialize_records.py +++ b/examples/training/object_detection_3d/waymo/serialize_records.py @@ -19,12 +19,8 @@ from keras_cv.datasets.waymo import build_tensors_for_augmentation from keras_cv.datasets.waymo import load -TRAINING_RECORD_PATH = ( - "./wod_records" # "gs://waymo_open_dataset_v_1_0_0_individual_files/training" -) -TRANSFORMED_RECORD_PATH = ( - "./wod_transformed" # "gs://waymo_open_dataset_v_1_0_0_individual_files/training" -) +TRAINING_RECORD_PATH = "./wod_records" # "gs://waymo_open_dataset_v_1_0_0_individual_files/training" +TRANSFORMED_RECORD_PATH = "./wod_transformed" # "gs://waymo_open_dataset_v_1_0_0_individual_files/training" def _float_feature(value): @@ -44,7 +40,9 @@ def serialize_example(feature0, feature1): } # Create a Features message using tf.train.Example. - example_proto = tf.train.Example(features=tf.train.Features(feature=feature)) + example_proto = tf.train.Example( + features=tf.train.Features(feature=feature) + ) return example_proto.SerializeToString() diff --git a/examples/training/object_detection_3d/waymo/train_pillars.py b/examples/training/object_detection_3d/waymo/train_pillars.py index cfae191247..1bd3b14f4e 100644 --- a/examples/training/object_detection_3d/waymo/train_pillars.py +++ b/examples/training/object_detection_3d/waymo/train_pillars.py @@ -19,9 +19,7 @@ from keras_cv.layers import preprocessing3d # use serialize_records to convert WOD frame to Tensors -TRAINING_RECORD_PATH = ( - "./wod_transformed" # "gs://waymo_open_dataset_v_1_0_0_individual_files/training" -) +TRAINING_RECORD_PATH = "./wod_transformed" # "gs://waymo_open_dataset_v_1_0_0_individual_files/training" global_batch = 1 @@ -52,18 +50,24 @@ def pad_tensors(x): point_clouds = point_clouds.to_tensor( default_value=-1.0, shape=[global_batch, 1, 200000, 8] ) - boxes = boxes.to_tensor(default_value=-1.0, shape=[global_batch, 1, 1000, 11]) + boxes = boxes.to_tensor( + default_value=-1.0, shape=[global_batch, 1, 1000, 11] + ) res["point_clouds"] = point_clouds res["bounding_boxes"] = boxes return res # Load the training dataset -filenames = tf.data.Dataset.list_files(os.path.join(TRAINING_RECORD_PATH, "*.tfrecord")) +filenames = tf.data.Dataset.list_files( + os.path.join(TRAINING_RECORD_PATH, "*.tfrecord") +) train_ds = tf.data.TFRecordDataset(filenames) train_ds = train_ds.map(build_tensors, num_parallel_calls=tf.data.AUTOTUNE) train_ds = train_ds.apply( - tf.data.experimental.dense_to_ragged_batch(global_batch, drop_remainder=True) + tf.data.experimental.dense_to_ragged_batch( + global_batch, drop_remainder=True + ) ) # Batch by 1 to add a dimension for `num_frames` train_ds = train_ds.map(pad_tensors, num_parallel_calls=tf.data.AUTOTUNE) diff --git a/examples/training/semantic_segmentation/pascal_voc/basic_training.py b/examples/training/semantic_segmentation/pascal_voc/basic_training.py index 2d515dcc10..0a4b7f190c 100644 --- a/examples/training/semantic_segmentation/pascal_voc/basic_training.py +++ b/examples/training/semantic_segmentation/pascal_voc/basic_training.py @@ -181,4 +181,6 @@ def proc_eval_fn(examples): ] model.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics) -model.fit(train_ds, epochs=FLAGS.epochs, validation_data=eval_ds, callbacks=callbacks) +model.fit( + train_ds, epochs=FLAGS.epochs, validation_data=eval_ds, callbacks=callbacks +) diff --git a/keras_cv/benchmarks/classification_training_benchmark_test.py b/keras_cv/benchmarks/classification_training_benchmark_test.py index a67000f778..9c51ebe2e3 100644 --- a/keras_cv/benchmarks/classification_training_benchmark_test.py +++ b/keras_cv/benchmarks/classification_training_benchmark_test.py @@ -70,7 +70,9 @@ def _run_benchmark(self, app, strategy): include_rescaling=True, ) model.compile( - optimizer=tf.keras.optimizers.SGD(learning_rate=0.1, momentum=0.9), + optimizer=tf.keras.optimizers.SGD( + learning_rate=0.1, momentum=0.9 + ), loss="categorical_crossentropy", metrics=["accuracy"], ) @@ -89,10 +91,15 @@ def _run_benchmark(self, app, strategy): metrics = [] metrics.append({"name": "compile_time", "value": compile_time}) - metrics.append({"name": "avg_epoch_time", "value": training_time / self.epochs}) + metrics.append( + {"name": "avg_epoch_time", "value": training_time / self.epochs} + ) metrics.append({"name": "epochs", "value": self.epochs}) metrics.append( - {"name": "accuracy", "value": training_results.history["accuracy"][0]} + { + "name": "accuracy", + "value": training_results.history["accuracy"][0], + } ) self.report_benchmark(wall_time=total_time, metrics=metrics) diff --git a/keras_cv/bounding_box/__init__.py b/keras_cv/bounding_box/__init__.py index 92f7169665..ce7a03a93c 100644 --- a/keras_cv/bounding_box/__init__.py +++ b/keras_cv/bounding_box/__init__.py @@ -22,7 +22,9 @@ from keras_cv.bounding_box.formats import XYXY from keras_cv.bounding_box.formats import YXYX from keras_cv.bounding_box.iou import compute_iou -from keras_cv.bounding_box.mask_invalid_detections import mask_invalid_detections +from keras_cv.bounding_box.mask_invalid_detections import ( + mask_invalid_detections, +) from keras_cv.bounding_box.to_dense import to_dense from keras_cv.bounding_box.to_ragged import to_ragged from keras_cv.bounding_box.utils import as_relative diff --git a/keras_cv/bounding_box/converters.py b/keras_cv/bounding_box/converters.py index 978d57cda0..ffbd7d33e4 100644 --- a/keras_cv/bounding_box/converters.py +++ b/keras_cv/bounding_box/converters.py @@ -54,7 +54,9 @@ def _encode_box_to_deltas( source=box_format, target="center_yxhw", ) - anchor_dimensions = tf.maximum(encoded_anchors[..., 2:], tf.keras.backend.epsilon()) + anchor_dimensions = tf.maximum( + encoded_anchors[..., 2:], tf.keras.backend.epsilon() + ) box_dimensions = tf.maximum(boxes[..., 2:], tf.keras.backend.epsilon()) # anchors be unbatched, boxes can either be batched or unbatched. boxes_delta = tf.concat( @@ -97,7 +99,8 @@ def decode_single_level(anchor, box_delta): # anchors be unbatched, boxes can either be batched or unbatched. box = tf.concat( [ - box_delta[..., :2] * encoded_anchor[..., 2:] + encoded_anchor[..., :2], + box_delta[..., :2] * encoded_anchor[..., 2:] + + encoded_anchor[..., :2], tf.math.exp(box_delta[..., 2:]) * encoded_anchor[..., 2:], ], axis=-1, @@ -138,7 +141,12 @@ def _xywh_to_xyxy(boxes, images=None, image_shape=None): def _xyxy_to_center_yxhw(boxes, images=None, image_shape=None): left, top, right, bottom = tf.split(boxes, ALL_AXES, axis=-1) return tf.concat( - [(top + bottom) / 2.0, (left + right) / 2.0, bottom - top, right - left], + [ + (top + bottom) / 2.0, + (left + right) / 2.0, + bottom - top, + right - left, + ], axis=-1, ) @@ -186,7 +194,12 @@ def _xyxy_to_rel_xywh(boxes, images=None, image_shape=None): def _xyxy_to_center_xywh(boxes, images=None, image_shape=None): left, top, right, bottom = tf.split(boxes, ALL_AXES, axis=-1) return tf.concat( - [(left + right) / 2.0, (top + bottom) / 2.0, right - left, bottom - top], + [ + (left + right) / 2.0, + (top + bottom) / 2.0, + right - left, + bottom - top, + ], axis=-1, ) @@ -494,7 +507,9 @@ def _image_shape(images, image_shape, boxes): height, width = image_shape[1], image_shape[2] else: height = tf.reshape(images.row_lengths(), (-1, 1)) - width = tf.reshape(tf.reduce_max(images.row_lengths(axis=2), 1), (-1, 1)) + width = tf.reshape( + tf.reduce_max(images.row_lengths(axis=2), 1), (-1, 1) + ) if isinstance(boxes, tf.RaggedTensor): height = tf.expand_dims(height, axis=-1) width = tf.expand_dims(width, axis=-1) diff --git a/keras_cv/bounding_box/converters_test.py b/keras_cv/bounding_box/converters_test.py index 73e71600ab..d16096f948 100644 --- a/keras_cv/bounding_box/converters_test.py +++ b/keras_cv/bounding_box/converters_test.py @@ -20,8 +20,12 @@ from keras_cv import bounding_box -xyxy_box = tf.constant([[[10, 20, 110, 120], [20, 30, 120, 130]]], dtype=tf.float32) -yxyx_box = tf.constant([[[20, 10, 120, 110], [30, 20, 130, 120]]], dtype=tf.float32) +xyxy_box = tf.constant( + [[[10, 20, 110, 120], [20, 30, 120, 130]]], dtype=tf.float32 +) +yxyx_box = tf.constant( + [[[20, 10, 120, 110], [30, 20, 130, 120]]], dtype=tf.float32 +) rel_xyxy_box = tf.constant( [[[0.01, 0.02, 0.11, 0.12], [0.02, 0.03, 0.12, 0.13]]], dtype=tf.float32 ) @@ -37,7 +41,9 @@ center_xywh_box = tf.constant( [[[60, 70, 100, 100], [70, 80, 100, 100]]], dtype=tf.float32 ) -xywh_box = tf.constant([[[10, 20, 100, 100], [20, 30, 100, 100]]], dtype=tf.float32) +xywh_box = tf.constant( + [[[10, 20, 100, 100], [20, 30, 100, 100]]], dtype=tf.float32 +) rel_xywh_box = tf.constant( [[[0.01, 0.02, 0.1, 0.1], [0.02, 0.03, 0.1, 0.1]]], dtype=tf.float32 ) @@ -79,7 +85,9 @@ test_image_ragged = [ (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations(boxes_ragged_images.keys(), 2) + for (source, target) in itertools.permutations( + boxes_ragged_images.keys(), 2 + ) ] + [("xyxy_xyxy", "xyxy", "xyxy")] @@ -130,7 +138,9 @@ def test_without_images(self): source_box = boxes["xyxy"] target_box = boxes["xywh"] self.assertAllClose( - bounding_box.convert_format(source_box, source="xyxy", target="xywh"), + bounding_box.convert_format( + source_box, source="xyxy", target="xywh" + ), target_box, ) @@ -172,7 +182,10 @@ def test_ragged_bounding_box_with_image_shape(self, source, target): target_box = _raggify(boxes[target]) self.assertAllClose( bounding_box.convert_format( - source_box, source=source, target=target, image_shape=(1000, 1000, 3) + source_box, + source=source, + target=target, + image_shape=(1000, 1000, 3), ), target_box, ) diff --git a/keras_cv/bounding_box/iou.py b/keras_cv/bounding_box/iou.py index d492f44da9..34bc292132 100644 --- a/keras_cv/bounding_box/iou.py +++ b/keras_cv/bounding_box/iou.py @@ -26,7 +26,9 @@ def _compute_area(box): Returns: a float Tensor of [N] or [batch_size, N] """ - y_min, x_min, y_max, x_max = tf.split(box[..., :4], num_or_size_splits=4, axis=-1) + y_min, x_min, y_max, x_max = tf.split( + box[..., :4], num_or_size_splits=4, axis=-1 + ) return tf.squeeze((y_max - y_min) * (x_max - x_min), axis=-1) @@ -148,6 +150,8 @@ def compute_iou( mask_val_t = tf.cast(mask_val, res.dtype) * tf.ones_like(res) boxes1_mask = tf.less(tf.reduce_max(boxes1, axis=-1, keepdims=True), 0.0) boxes2_mask = tf.less(tf.reduce_max(boxes2, axis=-1, keepdims=True), 0.0) - background_mask = tf.logical_or(boxes1_mask, tf.transpose(boxes2_mask, perm)) + background_mask = tf.logical_or( + boxes1_mask, tf.transpose(boxes2_mask, perm) + ) iou_lookup_table = tf.where(background_mask, mask_val_t, res) return iou_lookup_table diff --git a/keras_cv/bounding_box/iou_test.py b/keras_cv/bounding_box/iou_test.py index ee398d6563..923ec364e4 100644 --- a/keras_cv/bounding_box/iou_test.py +++ b/keras_cv/bounding_box/iou_test.py @@ -81,8 +81,16 @@ def test_batched_compute_iou(self): ) sample_y_pred = tf.constant( [ - [bb1_off_by_1_pred, top_left_bounding_box, another_far_away_pred], - [bb1_off_by_1_pred, top_left_bounding_box, another_far_away_pred], + [ + bb1_off_by_1_pred, + top_left_bounding_box, + another_far_away_pred, + ], + [ + bb1_off_by_1_pred, + top_left_bounding_box, + another_far_away_pred, + ], ], dtype=tf.float32, ) @@ -147,8 +155,16 @@ def test_unbatched_boxes1_batched_boxes2(self): ) sample_y_pred = tf.constant( [ - [bb1_off_by_1_pred, top_left_bounding_box, another_far_away_pred], - [bb1_off_by_1_pred, top_left_bounding_box, another_far_away_pred], + [ + bb1_off_by_1_pred, + top_left_bounding_box, + another_far_away_pred, + ], + [ + bb1_off_by_1_pred, + top_left_bounding_box, + another_far_away_pred, + ], ], dtype=tf.float32, ) diff --git a/keras_cv/bounding_box/mask_invalid_detections_test.py b/keras_cv/bounding_box/mask_invalid_detections_test.py index 0db43557c4..abd6b79408 100644 --- a/keras_cv/bounding_box/mask_invalid_detections_test.py +++ b/keras_cv/bounding_box/mask_invalid_detections_test.py @@ -28,13 +28,17 @@ def test_correctly_masks_based_on_max_dets(self): result = bounding_box.mask_invalid_detections(bounding_boxes) negative_one_boxes = result["boxes"][:, 5:, :] - self.assertAllClose(negative_one_boxes, -tf.ones_like(negative_one_boxes)) + self.assertAllClose( + negative_one_boxes, -tf.ones_like(negative_one_boxes) + ) preserved_boxes = result["boxes"][:, :2, :] self.assertAllClose(preserved_boxes, bounding_boxes["boxes"][:, :2, :]) boxes_from_image_3 = result["boxes"][2, :4, :] - self.assertAllClose(boxes_from_image_3, bounding_boxes["boxes"][2, :4, :]) + self.assertAllClose( + boxes_from_image_3, bounding_boxes["boxes"][2, :4, :] + ) def test_correctly_masks_based_on_max_dets_in_graph(self): bounding_boxes = { @@ -50,19 +54,27 @@ def apply_mask_detections(bounding_boxes): result = apply_mask_detections(bounding_boxes) negative_one_boxes = result["boxes"][:, 5:, :] - self.assertAllClose(negative_one_boxes, -tf.ones_like(negative_one_boxes)) + self.assertAllClose( + negative_one_boxes, -tf.ones_like(negative_one_boxes) + ) preserved_boxes = result["boxes"][:, :2, :] self.assertAllClose(preserved_boxes, bounding_boxes["boxes"][:, :2, :]) boxes_from_image_3 = result["boxes"][2, :4, :] - self.assertAllClose(boxes_from_image_3, bounding_boxes["boxes"][2, :4, :]) + self.assertAllClose( + boxes_from_image_3, bounding_boxes["boxes"][2, :4, :] + ) def test_ragged_outputs(self): bounding_boxes = { - "boxes": tf.stack([tf.random.uniform((10, 4)), tf.random.uniform((10, 4))]), + "boxes": tf.stack( + [tf.random.uniform((10, 4)), tf.random.uniform((10, 4))] + ), "num_detections": tf.constant([2, 3]), - "classes": tf.stack([tf.random.uniform((10,)), tf.random.uniform((10,))]), + "classes": tf.stack( + [tf.random.uniform((10,)), tf.random.uniform((10,))] + ), } result = bounding_box.mask_invalid_detections( diff --git a/keras_cv/bounding_box/to_dense.py b/keras_cv/bounding_box/to_dense.py index d21d23cb57..01de501e6b 100644 --- a/keras_cv/bounding_box/to_dense.py +++ b/keras_cv/bounding_box/to_dense.py @@ -70,10 +70,14 @@ def to_dense(bounding_boxes, max_boxes=None, default_value=-1): if "confidence" in bounding_boxes: if isinstance(bounding_boxes["confidence"], tf.RaggedTensor): - bounding_boxes["confidence"] = bounding_boxes["confidence"].to_tensor( + bounding_boxes["confidence"] = bounding_boxes[ + "confidence" + ].to_tensor( default_value=default_value, shape=_classes_shape( - info["is_batched"], bounding_boxes["confidence"].shape, max_boxes + info["is_batched"], + bounding_boxes["confidence"].shape, + max_boxes, ), ) diff --git a/keras_cv/bounding_box/to_ragged_test.py b/keras_cv/bounding_box/to_ragged_test.py index cd043b9d4b..815f99efe0 100644 --- a/keras_cv/bounding_box/to_ragged_test.py +++ b/keras_cv/bounding_box/to_ragged_test.py @@ -34,7 +34,10 @@ def test_converts_to_ragged(self): def test_round_trip(self): original = { "boxes": tf.constant( - [[[0, 0, 0, 0], [-1, -1, -1, -1]], [[-1, -1, -1, -1], [-1, -1, -1, -1]]] + [ + [[0, 0, 0, 0], [-1, -1, -1, -1]], + [[-1, -1, -1, -1], [-1, -1, -1, -1]], + ] ), "classes": tf.constant([[1, -1], [-1, -1]]), } diff --git a/keras_cv/bounding_box/utils.py b/keras_cv/bounding_box/utils.py index 609a58e753..24750af47b 100644 --- a/keras_cv/bounding_box/utils.py +++ b/keras_cv/bounding_box/utils.py @@ -21,7 +21,10 @@ def is_relative(bounding_box_format): """A util to check if a bounding box format uses relative coordinates""" - if bounding_box_format.lower() not in bounding_box.converters.TO_XYXY_CONVERTERS: + if ( + bounding_box_format.lower() + not in bounding_box.converters.TO_XYXY_CONVERTERS + ): raise ValueError( "`is_relative()` received an unsupported format for the argument " f"`bounding_box_format`. `bounding_box_format` should be one of " @@ -54,12 +57,16 @@ def _relative_area(boxes, bounding_box_format): widths = boxes[..., XYWH.WIDTH] heights = boxes[..., XYWH.HEIGHT] # handle corner case where shear performs a full inversion. - return tf.where(tf.math.logical_and(widths > 0, heights > 0), widths * heights, 0.0) + return tf.where( + tf.math.logical_and(widths > 0, heights > 0), widths * heights, 0.0 + ) # bounding_boxes is a dictionary with shape: # {"boxes": [None, None, 4], "mask": [None, None]} -def clip_to_image(bounding_boxes, bounding_box_format, images=None, image_shape=None): +def clip_to_image( + bounding_boxes, bounding_box_format, images=None, image_shape=None +): """clips bounding boxes to image boundaries. `clip_to_image()` clips bounding boxes that have coordinates out of bounds of an @@ -94,7 +101,9 @@ def clip_to_image(bounding_boxes, bounding_box_format, images=None, image_shape= ], axis=-1, ) - areas = _relative_area(clipped_bounding_boxes, bounding_box_format="rel_xyxy") + areas = _relative_area( + clipped_bounding_boxes, bounding_box_format="rel_xyxy" + ) clipped_bounding_boxes = bounding_box.convert_format( clipped_bounding_boxes, source="rel_xyxy", @@ -106,7 +115,9 @@ def clip_to_image(bounding_boxes, bounding_box_format, images=None, image_shape= tf.expand_dims(areas > 0.0, axis=-1), clipped_bounding_boxes, -1.0 ) classes = tf.where(areas > 0.0, classes, tf.constant(-1, classes.dtype)) - nan_indices = tf.math.reduce_any(tf.math.is_nan(clipped_bounding_boxes), axis=-1) + nan_indices = tf.math.reduce_any( + tf.math.is_nan(clipped_bounding_boxes), axis=-1 + ) classes = tf.where(nan_indices, tf.constant(-1, classes.dtype), classes) # TODO update dict and return diff --git a/keras_cv/bounding_box/utils_test.py b/keras_cv/bounding_box/utils_test.py index 3258ab66e8..471c897c14 100644 --- a/keras_cv/bounding_box/utils_test.py +++ b/keras_cv/bounding_box/utils_test.py @@ -60,7 +60,9 @@ def test_clip_to_image_filters_fully_out_bounding_boxes(self): height = 256 width = 256 bounding_boxes = { - "boxes": tf.convert_to_tensor([[257, 257, 400, 400], [100, 100, 300, 300]]), + "boxes": tf.convert_to_tensor( + [[257, 257, 400, 400], [100, 100, 300, 300]] + ), "classes": tf.convert_to_tensor([0, 0]), } image = tf.ones(shape=(height, width, 3)) @@ -82,7 +84,9 @@ def test_clip_to_image_filters_fully_out_bounding_boxes_negative_area(self): height = 256 width = 256 bounding_boxes = { - "boxes": tf.convert_to_tensor([[110, 120, 100, 100], [100, 100, 300, 300]]), + "boxes": tf.convert_to_tensor( + [[110, 120, 100, 100], [100, 100, 300, 300]] + ), "classes": [0, 0], } image = tf.ones(shape=(height, width, 3)) diff --git a/keras_cv/bounding_box/validate_format.py b/keras_cv/bounding_box/validate_format.py index 2032a2a2a8..b260dd554c 100644 --- a/keras_cv/bounding_box/validate_format.py +++ b/keras_cv/bounding_box/validate_format.py @@ -69,7 +69,9 @@ def validate_format(bounding_boxes): info["classes_one_hot"] = len(classes.shape) == 3 - if isinstance(boxes, tf.RaggedTensor) != isinstance(classes, tf.RaggedTensor): + if isinstance(boxes, tf.RaggedTensor) != isinstance( + classes, tf.RaggedTensor + ): raise ValueError( "Either both `boxes` and `classes` " "should be Ragged, or neither should be ragged." diff --git a/keras_cv/bounding_box/validate_format_test.py b/keras_cv/bounding_box/validate_format_test.py index 3889a1f484..69ae95ab7f 100644 --- a/keras_cv/bounding_box/validate_format_test.py +++ b/keras_cv/bounding_box/validate_format_test.py @@ -25,7 +25,8 @@ def test_raises_nondict(self): def test_mismatch_dimensions(self): with self.assertRaisesRegex( - ValueError, "Expected `boxes` and `classes` to have matching dimensions" + ValueError, + "Expected `boxes` and `classes` to have matching dimensions", ): bounding_box.validate_format( {"boxes": tf.ones((4, 3, 6)), "classes": tf.ones((4, 6))} diff --git a/keras_cv/callbacks/__init__.py b/keras_cv/callbacks/__init__.py index 55f7877357..accbf3af62 100644 --- a/keras_cv/callbacks/__init__.py +++ b/keras_cv/callbacks/__init__.py @@ -19,7 +19,9 @@ ) try: - from keras_cv.callbacks.waymo_evaluation_callback import WaymoEvaluationCallback + from keras_cv.callbacks.waymo_evaluation_callback import ( + WaymoEvaluationCallback, + ) except ImportError: print( "You do not have Waymo Open Dataset installed, so KerasCV Waymo metrics are not available." diff --git a/keras_cv/callbacks/pycoco_callback.py b/keras_cv/callbacks/pycoco_callback.py index a9199c99ee..ac72a5c9fb 100644 --- a/keras_cv/callbacks/pycoco_callback.py +++ b/keras_cv/callbacks/pycoco_callback.py @@ -19,7 +19,9 @@ class PyCOCOCallback(Callback): - def __init__(self, validation_data, bounding_box_format, cache=True, **kwargs): + def __init__( + self, validation_data, bounding_box_format, cache=True, **kwargs + ): """Creates a callback to evaluate PyCOCO metrics on a validation dataset. Args: @@ -62,7 +64,8 @@ def boxes_only(images, boxes): gt = [boxes for boxes in self.val_data.map(boxes_only)] gt_boxes = tf.concat( - [tf.RaggedTensor.from_tensor(boxes["boxes"]) for boxes in gt], axis=0 + [tf.RaggedTensor.from_tensor(boxes["boxes"]) for boxes in gt], + axis=0, ) gt_classes = tf.concat( [tf.RaggedTensor.from_tensor(boxes["classes"]) for boxes in gt], @@ -84,7 +87,9 @@ def boxes_only(images, boxes): ground_truth = {} ground_truth["source_id"] = [source_ids] - ground_truth["height"] = [tf.tile(tf.constant([height]), [total_images])] + ground_truth["height"] = [ + tf.tile(tf.constant([height]), [total_images]) + ] ground_truth["width"] = [tf.tile(tf.constant([width]), [total_images])] ground_truth["num_detections"] = [gt_boxes.row_lengths(axis=1)] diff --git a/keras_cv/callbacks/pycoco_callback_test.py b/keras_cv/callbacks/pycoco_callback_test.py index 9f33919662..5f71fd0059 100644 --- a/keras_cv/callbacks/pycoco_callback_test.py +++ b/keras_cv/callbacks/pycoco_callback_test.py @@ -18,7 +18,9 @@ import keras_cv from keras_cv.callbacks import PyCOCOCallback from keras_cv.metrics.coco.pycoco_wrapper import METRIC_NAMES -from keras_cv.models.object_detection.__test_utils__ import _create_bounding_box_dataset +from keras_cv.models.object_detection.__test_utils__ import ( + _create_bounding_box_dataset, +) class PyCOCOCallbackTest(tf.test.TestCase): @@ -50,7 +52,9 @@ def test_model_fit_retinanet(self): bounding_box_format="xyxy", use_dictionary_box_format=True ) - callback = PyCOCOCallback(validation_data=val_ds, bounding_box_format="xyxy") + callback = PyCOCOCallback( + validation_data=val_ds, bounding_box_format="xyxy" + ) history = model.fit(train_ds, callbacks=[callback]) self.assertAllInSet( diff --git a/keras_cv/callbacks/waymo_evaluation_callback.py b/keras_cv/callbacks/waymo_evaluation_callback.py index 50d748298b..d63344466f 100644 --- a/keras_cv/callbacks/waymo_evaluation_callback.py +++ b/keras_cv/callbacks/waymo_evaluation_callback.py @@ -82,7 +82,9 @@ def boxes_only(point_clouds, target): gt_boxes = tf.reshape(gt_boxes, (num_frames * boxes_per_gt_frame, 9)) # Remove boxes with class of -1 (these are non-boxes that come from padding) - gt_real_boxes = tf.not_equal(gt_boxes[:, CENTER_XYZ_DXDYDZ_PHI.CLASS], -1) + gt_real_boxes = tf.not_equal( + gt_boxes[:, CENTER_XYZ_DXDYDZ_PHI.CLASS], -1 + ) gt_boxes = tf.boolean_mask(gt_boxes, gt_real_boxes) frame_ids = tf.cast(tf.linspace(1, num_frames, num_frames), tf.int64) @@ -91,7 +93,9 @@ def boxes_only(point_clouds, target): ground_truth["ground_truth_frame_id"] = tf.boolean_mask( tf.repeat(frame_ids, boxes_per_gt_frame), gt_real_boxes ) - ground_truth["ground_truth_bbox"] = gt_boxes[:, : CENTER_XYZ_DXDYDZ_PHI.PHI + 1] + ground_truth["ground_truth_bbox"] = gt_boxes[ + :, : CENTER_XYZ_DXDYDZ_PHI.PHI + 1 + ] ground_truth["ground_truth_type"] = tf.cast( gt_boxes[:, CENTER_XYZ_DXDYDZ_PHI.CLASS], tf.uint8 ) @@ -101,8 +105,12 @@ def boxes_only(point_clouds, target): boxes_per_pred_frame = predicted_boxes.shape[1] total_predicted_boxes = boxes_per_pred_frame * num_frames - predicted_boxes = tf.reshape(predicted_boxes, (total_predicted_boxes, 7)) - predicted_classes = tf.reshape(predicted_classes, (total_predicted_boxes, 2)) + predicted_boxes = tf.reshape( + predicted_boxes, (total_predicted_boxes, 7) + ) + predicted_classes = tf.reshape( + predicted_classes, (total_predicted_boxes, 2) + ) # Remove boxes with class of -1 (these are non-boxes that come from padding) pred_real_boxes = tf.reduce_all(predicted_classes != -1, axis=[-1]) predicted_boxes = tf.boolean_mask(predicted_boxes, pred_real_boxes) @@ -117,7 +125,9 @@ def boxes_only(point_clouds, target): predictions["prediction_type"] = tf.cast( tf.argmax(predicted_classes, axis=-1), tf.uint8 ) - predictions["prediction_score"] = tf.reduce_max(predicted_classes, axis=-1) + predictions["prediction_score"] = tf.reduce_max( + predicted_classes, axis=-1 + ) predictions["prediction_overlap_nlz"] = tf.cast( tf.zeros(predicted_boxes.shape[0]), tf.bool ) diff --git a/keras_cv/callbacks/waymo_evaluation_callback_test.py b/keras_cv/callbacks/waymo_evaluation_callback_test.py index 2d9d85ac26..afeea7b8c2 100644 --- a/keras_cv/callbacks/waymo_evaluation_callback_test.py +++ b/keras_cv/callbacks/waymo_evaluation_callback_test.py @@ -44,7 +44,8 @@ def test_model_fit(self): [ tf.random.uniform((NUM_RECORDS // 2, NUM_BOXES, BOX_FEATURES)), tf.cast( - tf.fill((NUM_RECORDS // 2, NUM_BOXES, BOX_FEATURES), -1), tf.float32 + tf.fill((NUM_RECORDS // 2, NUM_BOXES, BOX_FEATURES), -1), + tf.float32, ), ], axis=0, diff --git a/keras_cv/core/__init__.py b/keras_cv/core/__init__.py index badde3e14b..e2ed7d7e13 100644 --- a/keras_cv/core/__init__.py +++ b/keras_cv/core/__init__.py @@ -11,7 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from keras_cv.core.factor_sampler.constant_factor_sampler import ConstantFactorSampler +from keras_cv.core.factor_sampler.constant_factor_sampler import ( + ConstantFactorSampler, +) from keras_cv.core.factor_sampler.factor_sampler import FactorSampler -from keras_cv.core.factor_sampler.normal_factor_sampler import NormalFactorSampler -from keras_cv.core.factor_sampler.uniform_factor_sampler import UniformFactorSampler +from keras_cv.core.factor_sampler.normal_factor_sampler import ( + NormalFactorSampler, +) +from keras_cv.core.factor_sampler.uniform_factor_sampler import ( + UniformFactorSampler, +) diff --git a/keras_cv/core/factor_sampler/normal_factor_sampler_test_.py b/keras_cv/core/factor_sampler/normal_factor_sampler_test_.py index b8b6305b27..b216b3f5ce 100644 --- a/keras_cv/core/factor_sampler/normal_factor_sampler_test_.py +++ b/keras_cv/core/factor_sampler/normal_factor_sampler_test_.py @@ -19,11 +19,15 @@ class NormalFactorTest(tf.test.TestCase): def test_sample(self): - factor = core.NormalFactor(mean=0.5, stddev=0.2, min_value=0, max_value=1) + factor = core.NormalFactor( + mean=0.5, stddev=0.2, min_value=0, max_value=1 + ) self.assertTrue(0 <= factor() <= 1) def test_config(self): - factor = core.NormalFactor(mean=0.5, stddev=0.2, min_value=0, max_value=1) + factor = core.NormalFactor( + mean=0.5, stddev=0.2, min_value=0, max_value=1 + ) config = factor.get_config() self.assertEqual(config["mean"], 0.5) self.assertEqual(config["stddev"], 0.2) diff --git a/keras_cv/core/factor_sampler/uniform_factor_sampler.py b/keras_cv/core/factor_sampler/uniform_factor_sampler.py index 02896c9a27..8355954ea4 100644 --- a/keras_cv/core/factor_sampler/uniform_factor_sampler.py +++ b/keras_cv/core/factor_sampler/uniform_factor_sampler.py @@ -43,7 +43,11 @@ def __init__(self, lower, upper, seed=None): def __call__(self, shape=(), dtype="float32"): return tf.random.uniform( - shape, seed=self.seed, minval=self.lower, maxval=self.upper, dtype=dtype + shape, + seed=self.seed, + minval=self.lower, + maxval=self.upper, + dtype=dtype, ) def get_config(self): diff --git a/keras_cv/datasets/imagenet/load.py b/keras_cv/datasets/imagenet/load.py index e6c834e43b..bda41c9d38 100644 --- a/keras_cv/datasets/imagenet/load.py +++ b/keras_cv/datasets/imagenet/load.py @@ -44,7 +44,9 @@ def apply(example): image = resizing(image) # Decode label - label = tf.cast(tf.reshape(parsed[label_key], shape=()), dtype=tf.int32) - 1 + label = ( + tf.cast(tf.reshape(parsed[label_key], shape=()), dtype=tf.int32) - 1 + ) label = tf.one_hot(label, 1000) return image, label @@ -92,7 +94,9 @@ def load( """ if batch_size is not None and img_size is None: - raise ValueError("Batching can only be performed if images are resized.") + raise ValueError( + "Batching can only be performed if images are resized." + ) num_splits = 1024 if split == "train" else 128 filenames = [ diff --git a/keras_cv/datasets/pascal_voc/segmentation.py b/keras_cv/datasets/pascal_voc/segmentation.py index 208f63b987..a070df8175 100644 --- a/keras_cv/datasets/pascal_voc/segmentation.py +++ b/keras_cv/datasets/pascal_voc/segmentation.py @@ -43,7 +43,9 @@ class and instance segmentation masks. import tensorflow as tf import tensorflow_datasets as tfds -VOC_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar" +VOC_URL = ( + "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar" +) """ @InProceedings{{BharathICCV2011, @@ -212,7 +214,9 @@ def _get_image_ids(data_dir, split): # "diff": "diff.txt", } with tf.io.gfile.GFile( - os.path.join(data_dir, "ImageSets", "Segmentation", data_file_mapping[split]), + os.path.join( + data_dir, "ImageSets", "Segmentation", data_file_mapping[split] + ), "r", ) as f: image_ids = f.read().splitlines() @@ -241,7 +245,9 @@ def _parse_single_image(image_file_path): object_segmentation_file_path = os.path.join( data_dir, "SegmentationObject", image_id + ".png" ) - annotation_file_path = os.path.join(data_dir, "Annotations", image_id + ".xml") + annotation_file_path = os.path.join( + data_dir, "Annotations", image_id + ".xml" + ) image_annotations = _parse_annotation_data(annotation_file_path) result = { @@ -261,8 +267,12 @@ def _parse_single_sbd_image(image_file_path): data_dir, image_file_name = os.path.split(image_file_path) data_dir = os.path.normpath(os.path.join(data_dir, os.path.pardir)) image_id, _ = os.path.splitext(image_file_name) - class_segmentation_file_path = os.path.join(data_dir, "cls", image_id + ".mat") - object_segmentation_file_path = os.path.join(data_dir, "inst", image_id + ".mat") + class_segmentation_file_path = os.path.join( + data_dir, "cls", image_id + ".mat" + ) + object_segmentation_file_path = os.path.join( + data_dir, "inst", image_id + ".mat" + ) result = { "image/filename": image_id + ".jpg", "image/file_path": image_file_path, @@ -308,7 +318,9 @@ def _build_metadata(data_dir, image_ids): def _build_sbd_metadata(data_dir, image_ids): # Parallel process all the images. - image_file_paths = [os.path.join(data_dir, "img", i + ".jpg") for i in image_ids] + image_file_paths = [ + os.path.join(data_dir, "img", i + ".jpg") for i in image_ids + ] pool_size = 10 if len(image_ids) > 10 else len(image_ids) with multiprocessing.Pool(pool_size) as p: metadata = p.map(_parse_single_sbd_image, image_file_paths) @@ -369,14 +381,20 @@ def _load_sbd_images(image_file_path, seg_cls_file_path, seg_obj_file_path): image = tf.io.read_file(image_file_path) image = tf.image.decode_jpeg(image) - segmentation_class_mask = tfds.core.lazy_imports.scipy.io.loadmat(seg_cls_file_path) - segmentation_class_mask = segmentation_class_mask["GTcls"]["Segmentation"][0][0] + segmentation_class_mask = tfds.core.lazy_imports.scipy.io.loadmat( + seg_cls_file_path + ) + segmentation_class_mask = segmentation_class_mask["GTcls"]["Segmentation"][ + 0 + ][0] segmentation_class_mask = segmentation_class_mask[..., np.newaxis] segmentation_object_mask = tfds.core.lazy_imports.scipy.io.loadmat( seg_obj_file_path ) - segmentation_object_mask = segmentation_object_mask["GTinst"]["Segmentation"][0][0] + segmentation_object_mask = segmentation_object_mask["GTinst"][ + "Segmentation" + ][0][0] segmentation_object_mask = segmentation_object_mask[..., np.newaxis] return { @@ -459,7 +477,13 @@ def load( download the data file, and unzip. It will be used as a cach directory. Default to None, and `~/.keras/pascal_voc_2012` will be used. """ - supported_split_value = ["train", "eval", "trainval", "sbd_train", "sbd_eval"] + supported_split_value = [ + "train", + "eval", + "trainval", + "sbd_train", + "sbd_eval", + ] if split not in supported_split_value: raise ValueError( f"The support value for `split` are {supported_split_value}. " diff --git a/keras_cv/datasets/pascal_voc/segmentation_test.py b/keras_cv/datasets/pascal_voc/segmentation_test.py index b9ae7be8d2..5c40f3bc87 100644 --- a/keras_cv/datasets/pascal_voc/segmentation_test.py +++ b/keras_cv/datasets/pascal_voc/segmentation_test.py @@ -31,7 +31,10 @@ def setUp(self): # FLAGS.test_srcdir self.test_data_tar_path = os.path.abspath( os.path.join( - os.path.abspath(__file__), os.path.pardir, "test_data", "VOC_mini.tar" + os.path.abspath(__file__), + os.path.pardir, + "test_data", + "VOC_mini.tar", ) ) @@ -63,7 +66,9 @@ def test_download_data(self): "SegmentationObject", ] for sub_dir in expected_subdirs: - self.assertTrue(os.path.exists(os.path.join(test_data_dir, sub_dir))) + self.assertTrue( + os.path.exists(os.path.join(test_data_dir, sub_dir)) + ) def test_skip_download_and_override(self): local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/") @@ -83,7 +88,9 @@ def test_skip_download_and_override(self): override_extract=False, ) self.assertTrue( - os.path.exists(os.path.join(test_data_dir, "Annotations", "dummy_dir")) + os.path.exists( + os.path.join(test_data_dir, "Annotations", "dummy_dir") + ) ) def test_get_image_ids(self): @@ -96,8 +103,12 @@ def test_get_image_ids(self): train_ids = ["2007_000032", "2007_000039", "2007_000063"] eval_ids = ["2007_000033"] train_eval_ids = train_ids + eval_ids - self.assertEquals(segmentation._get_image_ids(data_dir, "train"), train_ids) - self.assertEquals(segmentation._get_image_ids(data_dir, "eval"), eval_ids) + self.assertEquals( + segmentation._get_image_ids(data_dir, "train"), train_ids + ) + self.assertEquals( + segmentation._get_image_ids(data_dir, "eval"), eval_ids + ) self.assertEquals( segmentation._get_image_ids(data_dir, "trainval"), train_eval_ids ) @@ -110,7 +121,9 @@ def test_parse_annotation_file(self): local_dir_path=local_data_dir, ) # One of the train file. - annotation_file = os.path.join(data_dir, "Annotations", "2007_000032.xml") + annotation_file = os.path.join( + data_dir, "Annotations", "2007_000032.xml" + ) metadata = segmentation._parse_annotation_data(annotation_file) expected_result = { "height": 281, @@ -155,18 +168,28 @@ def test_decode_png_mask(self): extracted_dir=extracted_dir, local_dir_path=local_data_dir, ) - mask_file = os.path.join(data_dir, "SegmentationClass", "2007_000032.png") + mask_file = os.path.join( + data_dir, "SegmentationClass", "2007_000032.png" + ) mask = tf.io.decode_png(tf.io.read_file(mask_file)) segmentation._maybe_populate_voc_color_mapping() mask = segmentation._decode_png_mask(mask) self.assertEquals(mask.shape, (281, 500, 1)) - self.assertEquals(tf.reduce_max(mask), 255) # The 255 value is for the boundary - self.assertEquals(tf.reduce_min(mask), 0) # The 0 value is for the background + self.assertEquals( + tf.reduce_max(mask), 255 + ) # The 255 value is for the boundary + self.assertEquals( + tf.reduce_min(mask), 0 + ) # The 0 value is for the background # The mask contains two classes, 1 and 15, see the label section in the previous # test case. - self.assertEquals(tf.reduce_sum(tf.cast(tf.equal(mask, 1), tf.int32)), 4734) - self.assertEquals(tf.reduce_sum(tf.cast(tf.equal(mask, 15), tf.int32)), 866) + self.assertEquals( + tf.reduce_sum(tf.cast(tf.equal(mask, 1), tf.int32)), 4734 + ) + self.assertEquals( + tf.reduce_sum(tf.cast(tf.equal(mask, 15), tf.int32)), 866 + ) def test_parse_single_image(self): local_data_dir = os.path.join(self.tempdir, "pascal_voc_2012/") @@ -292,9 +315,17 @@ def test_build_dataset(self): # Check the mask png content png = entry["class_segmentation"] self.assertEquals(png.shape, (281, 500, 1)) - self.assertEquals(tf.reduce_max(png), 255) # The 255 value is for the boundary - self.assertEquals(tf.reduce_min(png), 0) # The 0 value is for the background + self.assertEquals( + tf.reduce_max(png), 255 + ) # The 255 value is for the boundary + self.assertEquals( + tf.reduce_min(png), 0 + ) # The 0 value is for the background # The mask contains two classes, 1 and 15, see the label section in the previous # test case. - self.assertEquals(tf.reduce_sum(tf.cast(tf.equal(png, 1), tf.int32)), 4734) - self.assertEquals(tf.reduce_sum(tf.cast(tf.equal(png, 15), tf.int32)), 866) + self.assertEquals( + tf.reduce_sum(tf.cast(tf.equal(png, 1), tf.int32)), 4734 + ) + self.assertEquals( + tf.reduce_sum(tf.cast(tf.equal(png, 15), tf.int32)), 866 + ) diff --git a/keras_cv/datasets/waymo/load.py b/keras_cv/datasets/waymo/load.py index 4ef801019f..b186984a6d 100644 --- a/keras_cv/datasets/waymo/load.py +++ b/keras_cv/datasets/waymo/load.py @@ -81,5 +81,6 @@ def simple_transformer(frame): ) segments = tf.data.TFRecordDataset(filenames) return tf.data.Dataset.from_generator( - _generate_frames(segments, transformer), output_signature=output_signature + _generate_frames(segments, transformer), + output_signature=output_signature, ) diff --git a/keras_cv/datasets/waymo/load_test.py b/keras_cv/datasets/waymo/load_test.py index e91bc7d13f..b6ea9cd582 100644 --- a/keras_cv/datasets/waymo/load_test.py +++ b/keras_cv/datasets/waymo/load_test.py @@ -33,7 +33,8 @@ def setUp(self): self.test_data_file = "wod_one_frame.tfrecord" @pytest.mark.skipif( - "TEST_WAYMO_DEPS" not in os.environ or os.environ["TEST_WAYMO_DEPS"] != "true", + "TEST_WAYMO_DEPS" not in os.environ + or os.environ["TEST_WAYMO_DEPS"] != "true", reason="Requires Waymo Open Dataset package", ) def test_load_from_directory(self): @@ -46,7 +47,8 @@ def test_load_from_directory(self): self.assertNotEqual(dataset[0]["timestamp_micros"], 0) @pytest.mark.skipif( - "TEST_WAYMO_DEPS" not in os.environ or os.environ["TEST_WAYMO_DEPS"] != "true", + "TEST_WAYMO_DEPS" not in os.environ + or os.environ["TEST_WAYMO_DEPS"] != "true", reason="Requires Waymo Open Dataset package", ) def test_load_from_files(self): diff --git a/keras_cv/datasets/waymo/transformer.py b/keras_cv/datasets/waymo/transformer.py index a8dc3ad96d..fd51c279c6 100644 --- a/keras_cv/datasets/waymo/transformer.py +++ b/keras_cv/datasets/waymo/transformer.py @@ -92,7 +92,8 @@ def _decode_range_images(frame) -> Dict[int, List[tf.Tensor]]: ri = dataset_pb2.MatrixFloat() ri.ParseFromString(bytearray(range_image_str_tensor.numpy())) ri_tensor = tf.reshape( - tf.convert_to_tensor(value=ri.data, dtype=tf.float32), ri.shape.dims + tf.convert_to_tensor(value=ri.data, dtype=tf.float32), + ri.shape.dims, ) range_images[lidar.name].append(ri_tensor) return range_images @@ -107,7 +108,9 @@ def _get_range_image_top_pose(frame) -> tf.Tensor: Returns: Pose tensors for the range image. """ - _, _, _, ri_pose = frame_utils.parse_range_image_and_camera_projection(frame) + _, _, _, ri_pose = frame_utils.parse_range_image_and_camera_projection( + frame + ) assert ri_pose ri_pose_tensor = tf.reshape( tf.convert_to_tensor(value=ri_pose.data), ri_pose.shape.dims @@ -184,11 +187,19 @@ def _get_point_top_lidar( if i == 0: has_second_return = range_image[1][:, :, 0] > 0 - has_second_return_list.append(tf.gather_nd(has_second_return, mask_idx)) - is_second_return_list.append(tf.zeros([mask_idx.shape[0]], dtype=tf.bool)) + has_second_return_list.append( + tf.gather_nd(has_second_return, mask_idx) + ) + is_second_return_list.append( + tf.zeros([mask_idx.shape[0]], dtype=tf.bool) + ) else: - has_second_return_list.append(tf.zeros([mask_idx.shape[0]], dtype=tf.bool)) - is_second_return_list.append(tf.ones([mask_idx.shape[0]], dtype=tf.bool)) + has_second_return_list.append( + tf.zeros([mask_idx.shape[0]], dtype=tf.bool) + ) + is_second_return_list.append( + tf.ones([mask_idx.shape[0]], dtype=tf.bool) + ) xyz = tf.concat(xyz_list, axis=0) feature = tf.concat(feature_list, axis=0) @@ -209,7 +220,9 @@ def _get_point_top_lidar( ], axis=-1, ) - sensor_id = tf.ones([xyz.shape[0], 1], dtype=tf.int32) * dataset_pb2.LaserName.TOP + sensor_id = ( + tf.ones([xyz.shape[0], 1], dtype=tf.int32) * dataset_pb2.LaserName.TOP + ) ri_row_col_sensor_id = tf.concat([row_col, sensor_id], axis=-1) return struct.PointTensors( @@ -275,10 +288,15 @@ def _get_point_lidar( assert len(ri_tensor) == 1, f"{sensor_id}" ri_tensor = ri_tensor[0] calibration = _get_lidar_calibration(frame, sensor_id) - extrinsic = tf.reshape(np.array(calibration.extrinsic.transform), [4, 4]) + extrinsic = tf.reshape( + np.array(calibration.extrinsic.transform), [4, 4] + ) beam_inclinations = range_image_utils.compute_inclination( tf.constant( - [calibration.beam_inclination_min, calibration.beam_inclination_max] + [ + calibration.beam_inclination_min, + calibration.beam_inclination_max, + ] ), height=ri_tensor.shape[0], ) @@ -294,7 +312,8 @@ def _get_point_lidar( xyz = tf.gather_nd(tf.squeeze(xyz, axis=0), mask_idx) feature = tf.gather_nd(ri_tensor[:, :, 1:3], mask_idx) feature = tf.concat( - [feature, tf.zeros([feature.shape[0], 2], dtype=tf.float32)], axis=-1 + [feature, tf.zeros([feature.shape[0], 2], dtype=tf.float32)], + axis=-1, ) nlz = tf.gather_nd(ri_tensor[:, :, -1] > 0, mask_idx) @@ -303,7 +322,10 @@ def _get_point_lidar( nlz_list.append(nlz) ri_row_col_sensor_id_list.append( tf.concat( - [mask_idx, sensor_id * tf.ones([nlz.shape[0], 1], dtype=tf.int32)], + [ + mask_idx, + sensor_id * tf.ones([nlz.shape[0], 1], dtype=tf.int32), + ], axis=-1, ) ) @@ -339,12 +361,15 @@ def _get_point(frame, max_num_lidar_points: int) -> struct.PointTensors: ) range_images.pop(dataset_pb2.LaserName.TOP) - point_tensors_lidar = _get_point_lidar(range_images, frame, max_num_lidar_points) + point_tensors_lidar = _get_point_lidar( + range_images, frame, max_num_lidar_points + ) merged = {} for key in vars(point_tensors_lidar).keys(): merged[key] = tf.concat( - [getattr(point_tensors_lidar, key), getattr(point_top_lidar, key)], axis=0 + [getattr(point_tensors_lidar, key), getattr(point_top_lidar, key)], + axis=0, ) return struct.PointTensors(**merged) @@ -402,7 +427,9 @@ def _get_point_label_box( ) box_class_list.append(model_object_type) box_id = tf.bitcast( - tf.fingerprint(tf.expand_dims(label.id.encode(encoding="ascii"), 0))[0], + tf.fingerprint( + tf.expand_dims(label.id.encode(encoding="ascii"), 0) + )[0], tf.int64, ) box_id_list.append(box_id) @@ -417,7 +444,14 @@ def _get_point_label_box( box_detection_difficulty = tf.constant( box_detection_difficulty_list, dtype=tf.int32 ) - return (box_3d, box_meta, box_class, box_id, box_density, box_detection_difficulty) + return ( + box_3d, + box_meta, + box_class, + box_id, + box_density, + box_detection_difficulty, + ) def _get_box_class_per_point( @@ -445,7 +479,9 @@ def _get_box_class_per_point( # [N] point_box_idx = tf.math.argmax(point_in_box, axis=-1, output_type=tf.int32) # [N] - point_box_class = tf.where(point_in_any_box, tf.gather(box_class, point_box_idx), 0) + point_box_class = tf.where( + point_in_any_box, tf.gather(box_class, point_box_idx), 0 + ) return point_box_class @@ -502,7 +538,9 @@ def _point_vehicle_to_global( ) -def _point_global_to_vehicle(point_xyz: tf.Tensor, sdc_pose: tf.Tensor) -> tf.Tensor: +def _point_global_to_vehicle( + point_xyz: tf.Tensor, sdc_pose: tf.Tensor +) -> tf.Tensor: """Transforms points from global to vehicle frame. Args: @@ -520,7 +558,9 @@ def _point_global_to_vehicle(point_xyz: tf.Tensor, sdc_pose: tf.Tensor) -> tf.Te ) -def _box_3d_vehicle_to_global(box_3d: tf.Tensor, sdc_pose: tf.Tensor) -> tf.Tensor: +def _box_3d_vehicle_to_global( + box_3d: tf.Tensor, sdc_pose: tf.Tensor +) -> tf.Tensor: """Transforms 3D boxes from vehicle to global frame. Args: @@ -536,13 +576,16 @@ def _box_3d_vehicle_to_global(box_3d: tf.Tensor, sdc_pose: tf.Tensor) -> tf.Tens new_center = _point_vehicle_to_global(center, sdc_pose) new_heading = ( - heading + tf.atan2(sdc_pose[..., 1, 0], sdc_pose[..., 0, 0])[..., tf.newaxis] + heading + + tf.atan2(sdc_pose[..., 1, 0], sdc_pose[..., 0, 0])[..., tf.newaxis] ) return tf.concat([new_center, dim, new_heading[..., tf.newaxis]], axis=-1) -def _box_3d_global_to_vehicle(box_3d: tf.Tensor, sdc_pose: tf.Tensor) -> tf.Tensor: +def _box_3d_global_to_vehicle( + box_3d: tf.Tensor, sdc_pose: tf.Tensor +) -> tf.Tensor: """Transforms 3D boxes from global to vehicle frame. Args: @@ -558,7 +601,8 @@ def _box_3d_global_to_vehicle(box_3d: tf.Tensor, sdc_pose: tf.Tensor) -> tf.Tens new_center = _point_global_to_vehicle(center, sdc_pose) new_heading = ( - heading + tf.atan2(sdc_pose[..., 0, 1], sdc_pose[..., 0, 0])[..., tf.newaxis] + heading + + tf.atan2(sdc_pose[..., 0, 1], sdc_pose[..., 0, 0])[..., tf.newaxis] ) return tf.concat([new_center, dim, new_heading[..., tf.newaxis]], axis=-1) @@ -580,23 +624,26 @@ def build_tensors_from_wod_frame(frame) -> Dict[str, tf.Tensor]: "keras_cv.datasets.waymo.build_tensors_from_wod_frame()" ) - frame_id_bytes = "{}_{}".format(frame.context.name, frame.timestamp_micros).encode( - encoding="ascii" - ) + frame_id_bytes = "{}_{}".format( + frame.context.name, frame.timestamp_micros + ).encode(encoding="ascii") frame_id = tf.bitcast( tf.fingerprint(tf.expand_dims(frame_id_bytes, 0))[0], tf.int64 ) timestamp_micros = tf.constant(frame.timestamp_micros, dtype=tf.int64) pose = tf.convert_to_tensor( - value=np.reshape(np.array(frame.pose.transform), [4, 4]), dtype_hint=tf.float32 + value=np.reshape(np.array(frame.pose.transform), [4, 4]), + dtype_hint=tf.float32, ) point_tensors = _get_point(frame, _MAX_NUM_NON_TOP_LIDAR_POINTS) point_label_tensors = _get_point_label(frame, point_tensors.point_xyz) # Transforms lidar frames to global coordinates. - point_tensors.point_xyz = _point_vehicle_to_global(point_tensors.point_xyz, pose) + point_tensors.point_xyz = _point_vehicle_to_global( + point_tensors.point_xyz, pose + ) point_label_tensors.label_box = _box_3d_vehicle_to_global( point_label_tensors.label_box, pose ) @@ -671,7 +718,9 @@ def _pad_fn(t: tf.Tensor, max_counts: int) -> tf.Tensor: return frame -def transform_to_vehicle_frame(frame: Dict[str, tf.Tensor]) -> Dict[str, tf.Tensor]: +def transform_to_vehicle_frame( + frame: Dict[str, tf.Tensor] +) -> Dict[str, tf.Tensor]: """Transform tensors in a frame from global coordinates to vehicle coordinates. Args: @@ -746,9 +795,15 @@ def build_tensors_for_augmentation( boxes = tf.concat( [ frame["label_box"][tf.newaxis, :], - tf.cast(frame["label_box_class"], tf.float32)[tf.newaxis, :, tf.newaxis], - tf.cast(frame["label_box_mask"], tf.float32)[tf.newaxis, :, tf.newaxis], - tf.cast(frame["label_box_density"], tf.float32)[tf.newaxis, :, tf.newaxis], + tf.cast(frame["label_box_class"], tf.float32)[ + tf.newaxis, :, tf.newaxis + ], + tf.cast(frame["label_box_mask"], tf.float32)[ + tf.newaxis, :, tf.newaxis + ], + tf.cast(frame["label_box_density"], tf.float32)[ + tf.newaxis, :, tf.newaxis + ], tf.cast(frame["label_box_detection_difficulty"], tf.float32)[ tf.newaxis, :, tf.newaxis ], diff --git a/keras_cv/datasets/waymo/transformer_test.py b/keras_cv/datasets/waymo/transformer_test.py index 72454bda52..07f150ea2b 100644 --- a/keras_cv/datasets/waymo/transformer_test.py +++ b/keras_cv/datasets/waymo/transformer_test.py @@ -33,7 +33,8 @@ def setUp(self): ) @pytest.mark.skipif( - "TEST_WAYMO_DEPS" not in os.environ or os.environ["TEST_WAYMO_DEPS"] != "true", + "TEST_WAYMO_DEPS" not in os.environ + or os.environ["TEST_WAYMO_DEPS"] != "true", reason="Requires Waymo Open Dataset package", ) def test_load_and_transform(self): @@ -56,8 +57,12 @@ def test_load_and_transform(self): # Laser points. point_xyz_mean = tf.reduce_mean(lidar_tensors["point_xyz"], axis=0) - self.assertAllClose(point_xyz_mean, lidar_tensors["pose"][:3, 3], atol=100) - point_feature_mean = tf.reduce_mean(lidar_tensors["point_feature"], axis=0) + self.assertAllClose( + point_xyz_mean, lidar_tensors["pose"][:3, 3], atol=100 + ) + point_feature_mean = tf.reduce_mean( + lidar_tensors["point_feature"], axis=0 + ) self.assertAllGreater(point_feature_mean[0], 0) self.assertAllGreater(tf.abs(point_feature_mean[1]), 1e-6) self.assertAllGreater(point_feature_mean[2:4], 0) @@ -69,7 +74,9 @@ def test_load_and_transform(self): self.assertEqual(lidar_tensors["label_box_class"].shape[0], num_boxes) self.assertEqual(lidar_tensors["label_box_density"].shape[0], num_boxes) self.assertTrue(tf.math.reduce_all(lidar_tensors["label_box_mask"])) - self.assertAllGreater(tf.math.reduce_max(lidar_tensors["label_point_class"]), 0) + self.assertAllGreater( + tf.math.reduce_max(lidar_tensors["label_point_class"]), 0 + ) # Multi-frame tensors for augmentation. augmented_example = next( @@ -79,7 +86,8 @@ def test_load_and_transform(self): self.assertEqual(augmented_example["bounding_boxes"].shape, [16, 11]) @pytest.mark.skipif( - "TEST_WAYMO_DEPS" not in os.environ or os.environ["TEST_WAYMO_DEPS"] != "true", + "TEST_WAYMO_DEPS" not in os.environ + or os.environ["TEST_WAYMO_DEPS"] != "true", reason="Requires Waymo Open Dataset package", ) def test_pad_and_transform_to_vehicle(self): @@ -112,4 +120,6 @@ def test_pad_and_transform_to_vehicle(self): self.assertEqual(example["label_box_density"].shape[0], 1000) self.assertEqual(example["label_box_mask"].shape, [1000]) self.assertTrue(tf.math.reduce_any(example["label_box_mask"])) - self.assertAllGreater(tf.math.reduce_max(example["label_point_class"]), 0) + self.assertAllGreater( + tf.math.reduce_max(example["label_point_class"]), 0 + ) diff --git a/keras_cv/keypoint/converters_test.py b/keras_cv/keypoint/converters_test.py index d3ec33b7e3..87c6407c1e 100644 --- a/keras_cv/keypoint/converters_test.py +++ b/keras_cv/keypoint/converters_test.py @@ -109,7 +109,9 @@ def add_metadata(ins): def test_raise_errors_when_missing_shape(self): with self.assertRaises(ValueError) as e: - keypoint.convert_format(keypoints["xy"], source="xy", target="rel_xy") + keypoint.convert_format( + keypoints["xy"], source="xy", target="rel_xy" + ) self.assertEqual( str(e.exception), diff --git a/keras_cv/keypoint/utils_test.py b/keras_cv/keypoint/utils_test.py index 90af04f131..dffa5d7a53 100644 --- a/keras_cv/keypoint/utils_test.py +++ b/keras_cv/keypoint/utils_test.py @@ -38,13 +38,17 @@ class UtilsTestCase(tf.test.TestCase, parameterized.TestCase): [[10.0, 20.0], [30.0, 40.0], [50.0, 50.0]], [2, 1] ), tf.zeros([50, 50, 3]), - tf.RaggedTensor.from_row_lengths([[10.0, 20.0], [30.0, 40.0]], [2, 0]), + tf.RaggedTensor.from_row_lengths( + [[10.0, 20.0], [30.0, 40.0]], [2, 0] + ), ), ( "height - width confusion", tf.constant([[[10.0, 20.0]], [[40.0, 30.0]], [[30.0, 40.0]]]), tf.zeros((50, 40, 3)), - tf.ragged.constant([[[10.0, 20.0]], [], [[30.0, 40.0]]], ragged_rank=1), + tf.ragged.constant( + [[[10.0, 20.0]], [], [[30.0, 40.0]]], ragged_rank=1 + ), ), ) def test_result(self, keypoints, image, expected): diff --git a/keras_cv/layers/__init__.py b/keras_cv/layers/__init__.py index de0cd11c04..bda93a7a2b 100644 --- a/keras_cv/layers/__init__.py +++ b/keras_cv/layers/__init__.py @@ -51,7 +51,9 @@ RandomAugmentationPipeline, ) from keras_cv.layers.preprocessing.random_brightness import RandomBrightness -from keras_cv.layers.preprocessing.random_channel_shift import RandomChannelShift +from keras_cv.layers.preprocessing.random_channel_shift import ( + RandomChannelShift, +) from keras_cv.layers.preprocessing.random_choice import RandomChoice from keras_cv.layers.preprocessing.random_color_degeneration import ( RandomColorDegeneration, @@ -59,10 +61,14 @@ from keras_cv.layers.preprocessing.random_color_jitter import RandomColorJitter from keras_cv.layers.preprocessing.random_contrast import RandomContrast from keras_cv.layers.preprocessing.random_crop import RandomCrop -from keras_cv.layers.preprocessing.random_crop_and_resize import RandomCropAndResize +from keras_cv.layers.preprocessing.random_crop_and_resize import ( + RandomCropAndResize, +) from keras_cv.layers.preprocessing.random_cutout import RandomCutout from keras_cv.layers.preprocessing.random_flip import RandomFlip -from keras_cv.layers.preprocessing.random_gaussian_blur import RandomGaussianBlur +from keras_cv.layers.preprocessing.random_gaussian_blur import ( + RandomGaussianBlur, +) from keras_cv.layers.preprocessing.random_hue import RandomHue from keras_cv.layers.preprocessing.random_jpeg_quality import RandomJpegQuality from keras_cv.layers.preprocessing.random_rotation import RandomRotation @@ -71,8 +77,12 @@ from keras_cv.layers.preprocessing.random_shear import RandomShear from keras_cv.layers.preprocessing.random_translation import RandomTranslation from keras_cv.layers.preprocessing.random_zoom import RandomZoom -from keras_cv.layers.preprocessing.randomly_zoomed_crop import RandomlyZoomedCrop -from keras_cv.layers.preprocessing.repeated_augmentation import RepeatedAugmentation +from keras_cv.layers.preprocessing.randomly_zoomed_crop import ( + RandomlyZoomedCrop, +) +from keras_cv.layers.preprocessing.repeated_augmentation import ( + RepeatedAugmentation, +) from keras_cv.layers.preprocessing.rescaling import Rescaling from keras_cv.layers.preprocessing.resizing import Resizing from keras_cv.layers.preprocessing.solarization import Solarization @@ -86,8 +96,12 @@ GlobalRandomDroppingPoints, ) from keras_cv.layers.preprocessing_3d.global_random_flip import GlobalRandomFlip -from keras_cv.layers.preprocessing_3d.global_random_rotation import GlobalRandomRotation -from keras_cv.layers.preprocessing_3d.global_random_scaling import GlobalRandomScaling +from keras_cv.layers.preprocessing_3d.global_random_rotation import ( + GlobalRandomRotation, +) +from keras_cv.layers.preprocessing_3d.global_random_scaling import ( + GlobalRandomScaling, +) from keras_cv.layers.preprocessing_3d.global_random_translation import ( GlobalRandomTranslation, ) diff --git a/keras_cv/layers/feature_pyramid.py b/keras_cv/layers/feature_pyramid.py index 069cf66e9d..8babbf7042 100644 --- a/keras_cv/layers/feature_pyramid.py +++ b/keras_cv/layers/feature_pyramid.py @@ -189,7 +189,9 @@ def build_feature_pyramid(self, input_features): # Post apply the output layers so that we don't leak them to the down stream level for level in reversed_levels: - output_features[level] = self.output_layers[level](output_features[level]) + output_features[level] = self.output_layers[level]( + output_features[level] + ) return output_features diff --git a/keras_cv/layers/feature_pyramid_test.py b/keras_cv/layers/feature_pyramid_test.py index 6bd3230230..95e746d818 100644 --- a/keras_cv/layers/feature_pyramid_test.py +++ b/keras_cv/layers/feature_pyramid_test.py @@ -75,27 +75,43 @@ def test_with_keras_input_tensor(self): def test_invalid_lateral_layers(self): lateral_layers = [tf.keras.layers.Conv2D(256, 1)] * 3 - with self.assertRaisesRegexp(ValueError, "Expect lateral_layers to be a dict"): - _ = FeaturePyramid(min_level=2, max_level=5, lateral_layers=lateral_layers) + with self.assertRaisesRegexp( + ValueError, "Expect lateral_layers to be a dict" + ): + _ = FeaturePyramid( + min_level=2, max_level=5, lateral_layers=lateral_layers + ) lateral_layers = { 2: tf.keras.layers.Conv2D(256, 1), 3: tf.keras.layers.Conv2D(256, 1), 4: tf.keras.layers.Conv2D(256, 1), } - with self.assertRaisesRegexp(ValueError, "with keys as .* [2, 3, 4, 5]"): - _ = FeaturePyramid(min_level=2, max_level=5, lateral_layers=lateral_layers) + with self.assertRaisesRegexp( + ValueError, "with keys as .* [2, 3, 4, 5]" + ): + _ = FeaturePyramid( + min_level=2, max_level=5, lateral_layers=lateral_layers + ) def test_invalid_output_layers(self): output_layers = [tf.keras.layers.Conv2D(256, 3)] * 3 - with self.assertRaisesRegexp(ValueError, "Expect output_layers to be a dict"): - _ = FeaturePyramid(min_level=2, max_level=5, output_layers=output_layers) + with self.assertRaisesRegexp( + ValueError, "Expect output_layers to be a dict" + ): + _ = FeaturePyramid( + min_level=2, max_level=5, output_layers=output_layers + ) output_layers = { 2: tf.keras.layers.Conv2D(256, 3), 3: tf.keras.layers.Conv2D(256, 3), 4: tf.keras.layers.Conv2D(256, 3), } - with self.assertRaisesRegexp(ValueError, "with keys as .* [2, 3, 4, 5]"): - _ = FeaturePyramid(min_level=2, max_level=5, output_layers=output_layers) + with self.assertRaisesRegexp( + ValueError, "with keys as .* [2, 3, 4, 5]" + ): + _ = FeaturePyramid( + min_level=2, max_level=5, output_layers=output_layers + ) def test_invalid_input_features(self): layer = FeaturePyramid(min_level=2, max_level=5) @@ -105,9 +121,13 @@ def test_invalid_input_features(self): c4 = tf.ones([2, 16, 16, 3]) c5 = tf.ones([2, 8, 8, 3]) list_input = [c2, c3, c4, c5] - with self.assertRaisesRegexp(ValueError, "expects input features to be a dict"): + with self.assertRaisesRegexp( + ValueError, "expects input features to be a dict" + ): layer(list_input) dict_input_with_missing_feature = {2: c2, 3: c3, 4: c4} - with self.assertRaisesRegexp(ValueError, "Expect feature keys.*[2, 3, 4, 5]"): + with self.assertRaisesRegexp( + ValueError, "Expect feature keys.*[2, 3, 4, 5]" + ): layer(dict_input_with_missing_feature) diff --git a/keras_cv/layers/fusedmbconv.py b/keras_cv/layers/fusedmbconv.py index b2b3da8648..beccb2dcbf 100644 --- a/keras_cv/layers/fusedmbconv.py +++ b/keras_cv/layers/fusedmbconv.py @@ -155,7 +155,9 @@ def __init__( ) self.bn3 = layers.BatchNormalization( - axis=BN_AXIS, momentum=self.bn_momentum, name=self.name + "project_bn" + axis=BN_AXIS, + momentum=self.bn_momentum, + name=self.name + "project_bn", ) def build(self, input_shape): diff --git a/keras_cv/layers/fusedmbconv_test.py b/keras_cv/layers/fusedmbconv_test.py index 42b7029a7e..64d5b59b55 100644 --- a/keras_cv/layers/fusedmbconv_test.py +++ b/keras_cv/layers/fusedmbconv_test.py @@ -47,7 +47,9 @@ def test_different_input_output_shapes(self): def test_squeeze_excitation_ratio(self): inputs = tf.random.normal(shape=(1, 64, 64, 32), dtype=tf.float32) - layer = FusedMBConvBlock(input_filters=32, output_filters=48, se_ratio=0.25) + layer = FusedMBConvBlock( + input_filters=32, output_filters=48, se_ratio=0.25 + ) output = layer(inputs) self.assertEquals(output.shape, [1, 64, 64, 48]) diff --git a/keras_cv/layers/mbconv.py b/keras_cv/layers/mbconv.py index 4209e1b27a..272d6398a4 100644 --- a/keras_cv/layers/mbconv.py +++ b/keras_cv/layers/mbconv.py @@ -115,7 +115,9 @@ def __init__( momentum=self.bn_momentum, name=self.name + "expand_bn", ) - self.act = layers.Activation(self.activation, name=self.name + "activation") + self.act = layers.Activation( + self.activation, name=self.name + "activation" + ) self.depthwise = layers.DepthwiseConv2D( kernel_size=self.kernel_size, strides=self.strides, @@ -160,7 +162,9 @@ def __init__( ) self.bn3 = layers.BatchNormalization( - axis=BN_AXIS, momentum=self.bn_momentum, name=self.name + "project_bn" + axis=BN_AXIS, + momentum=self.bn_momentum, + name=self.name + "project_bn", ) def build(self, input_shape): diff --git a/keras_cv/layers/object_detection/anchor_generator.py b/keras_cv/layers/object_detection/anchor_generator.py index 47915c108d..df854017f7 100644 --- a/keras_cv/layers/object_detection/anchor_generator.py +++ b/keras_cv/layers/object_detection/anchor_generator.py @@ -87,7 +87,9 @@ def __init__( self.bounding_box_format = bounding_box_format # aspect_ratio is a single list that is the same across all levels. sizes, strides = self._format_sizes_and_strides(sizes, strides) - aspect_ratios = self._match_param_structure_to_sizes(aspect_ratios, sizes) + aspect_ratios = self._match_param_structure_to_sizes( + aspect_ratios, sizes + ) scales = self._match_param_structure_to_sizes(scales, sizes) self.anchor_generators = {} @@ -105,7 +107,9 @@ def __init__( @staticmethod def _format_sizes_and_strides(sizes, strides): - result_sizes = AnchorGenerator._ensure_param_is_levels_dict(sizes, "sizes") + result_sizes = AnchorGenerator._ensure_param_is_levels_dict( + sizes, "sizes" + ) result_strides = AnchorGenerator._ensure_param_is_levels_dict( strides, "strides" ) @@ -147,14 +151,17 @@ def _match_param_structure_to_sizes(params, sizes): # return [params] * len(sizes) if not isinstance(sizes, dict): raise ValueError( - "the structure of `sizes` must be a dict, " f"received sizes={sizes}" + "the structure of `sizes` must be a dict, " + f"received sizes={sizes}" ) return tf.nest.map_structure(lambda _: params, sizes) def __call__(self, image=None, image_shape=None): if image is None and image_shape is None: - raise ValueError("AnchorGenerator() requires `images` or `image_shape`.") + raise ValueError( + "AnchorGenerator() requires `images` or `image_shape`." + ) if image is not None: if image.shape.rank != 3: @@ -280,4 +287,6 @@ def __call__(self, image_size): x_max = tf.maximum(tf.minimum(x_max, image_width), 0.0) # [H * W * K, 4] - return tf.cast(tf.concat([y_min, x_min, y_max, x_max], axis=-1), self.dtype) + return tf.cast( + tf.concat([y_min, x_min, y_max, x_max], axis=-1), self.dtype + ) diff --git a/keras_cv/layers/object_detection/box_matcher.py b/keras_cv/layers/object_detection/box_matcher.py index 22f16a0102..0a32cc09c8 100644 --- a/keras_cv/layers/object_detection/box_matcher.py +++ b/keras_cv/layers/object_detection/box_matcher.py @@ -133,8 +133,12 @@ def _match_when_cols_are_empty(): or ignored match). """ with tf.name_scope("empty_boxes"): - matched_columns = tf.zeros([batch_size, num_rows], dtype=tf.int32) - matched_values = -tf.ones([batch_size, num_rows], dtype=tf.int32) + matched_columns = tf.zeros( + [batch_size, num_rows], dtype=tf.int32 + ) + matched_values = -tf.ones( + [batch_size, num_rows], dtype=tf.int32 + ) return matched_columns, matched_values def _match_when_cols_are_non_empty(): @@ -183,11 +187,14 @@ def _match_when_cols_are_non_empty(): # [batch_size, num_rows], for each row (anchor), find the matched # column (groundtruth_box). force_matched_columns = tf.argmax( - input=column_to_row_match_mapping, axis=1, output_type=tf.int32 + input=column_to_row_match_mapping, + axis=1, + output_type=tf.int32, ) # [batch_size, num_rows] force_matched_column_mask = tf.cast( - tf.reduce_max(column_to_row_match_mapping, axis=1), tf.bool + tf.reduce_max(column_to_row_match_mapping, axis=1), + tf.bool, ) # [batch_size, num_rows] matched_columns = tf.where( @@ -205,7 +212,8 @@ def _match_when_cols_are_non_empty(): return matched_columns, matched_values num_boxes = ( - similarity_matrix.shape.as_list()[-1] or tf.shape(similarity_matrix)[-1] + similarity_matrix.shape.as_list()[-1] + or tf.shape(similarity_matrix)[-1] ) matched_columns, matched_values = tf.cond( pred=tf.greater(num_boxes, 0), diff --git a/keras_cv/layers/object_detection/box_matcher_test.py b/keras_cv/layers/object_detection/box_matcher_test.py index 7144f60059..021f99a8ef 100644 --- a/keras_cv/layers/object_detection/box_matcher_test.py +++ b/keras_cv/layers/object_detection/box_matcher_test.py @@ -41,7 +41,9 @@ def test_box_matcher_unsorted_thresholds(self): ) def test_box_matcher_unbatched(self): - sim_matrix = tf.constant([[0.04, 0, 0, 0], [0, 0, 1.0, 0]], dtype=tf.float32) + sim_matrix = tf.constant( + [[0.04, 0, 0, 0], [0, 0, 1.0, 0]], dtype=tf.float32 + ) fg_threshold = 0.5 bg_thresh_hi = 0.2 @@ -61,7 +63,9 @@ def test_box_matcher_unbatched(self): self.assertAllEqual(matched_values.numpy(), [-2, 1]) def test_box_matcher_batched(self): - sim_matrix = tf.constant([[[0.04, 0, 0, 0], [0, 0, 1.0, 0]]], dtype=tf.float32) + sim_matrix = tf.constant( + [[[0.04, 0, 0, 0], [0, 0, 1.0, 0]]], dtype=tf.float32 + ) fg_threshold = 0.5 bg_thresh_hi = 0.2 @@ -100,7 +104,9 @@ def test_box_matcher_force_match(self): negative_matches = tf.equal(matched_values, -2) self.assertAllEqual(positive_matches.numpy(), [True, True, True, True]) - self.assertAllEqual(negative_matches.numpy(), [False, False, False, False]) + self.assertAllEqual( + negative_matches.numpy(), [False, False, False, False] + ) # the first anchor cannot be matched to 4th gt box given that is matched to # the last anchor. self.assertAllEqual(match_indices.numpy(), [1, 2, 0, 3]) diff --git a/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py b/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py index 8a4a680bd4..0ca2a0a9ab 100644 --- a/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py +++ b/keras_cv/layers/object_detection/multi_class_non_max_suppression_test.py @@ -24,7 +24,11 @@ def decode_predictions_output_shapes(): predictions_shape = (8, 98208, 4 + classes) predictions = tf.random.stateless_uniform( - shape=predictions_shape, seed=(2, 3), minval=0.0, maxval=1.0, dtype=tf.float32 + shape=predictions_shape, + seed=(2, 3), + minval=0.0, + maxval=1.0, + dtype=tf.float32, ) box_pred = predictions[..., :4] confidence_pred = predictions[..., 4:] @@ -35,7 +39,9 @@ def decode_predictions_output_shapes(): max_detections=100, ) - result = layer(box_prediction=box_pred, confidence_prediction=confidence_pred) + result = layer( + box_prediction=box_pred, confidence_prediction=confidence_pred + ) return result @@ -50,7 +56,9 @@ def test_decode_predictions_output_shapes(self): @unittest.expectedFailure class NmsPredictionDecoderTestWithXLA(tf.test.TestCase): def test_decode_predictions_output_shapes(self): - xla_function = tf.function(decode_predictions_output_shapes, jit_compile=True) + xla_function = tf.function( + decode_predictions_output_shapes, jit_compile=True + ) result = xla_function() self.assertEqual(result["boxes"].shape, [8, 100, 4]) self.assertEqual(result["classes"].shape, [8, 100]) @@ -66,7 +74,9 @@ def tearDown(self): # @unittest.expectedFailure def test_decode_predictions_output_shapes(self): - xla_function = tf.function(decode_predictions_output_shapes, jit_compile=True) + xla_function = tf.function( + decode_predictions_output_shapes, jit_compile=True + ) result = xla_function() self.assertEqual(result["boxes"].shape, [8, 100, 4]) self.assertEqual(result["classes"].shape, [8, 100]) diff --git a/keras_cv/layers/object_detection/retina_net_label_encoder.py b/keras_cv/layers/object_detection/retina_net_label_encoder.py index 769731de6c..a9a3d73aaa 100644 --- a/keras_cv/layers/object_detection/retina_net_label_encoder.py +++ b/keras_cv/layers/object_detection/retina_net_label_encoder.py @@ -110,7 +110,9 @@ def _encode_sample(self, box_labels, anchor_boxes): matched_vals = matched_vals[..., tf.newaxis] positive_mask = tf.cast(tf.math.equal(matched_vals, 1), self.dtype) ignore_mask = tf.cast(tf.math.equal(matched_vals, -2), self.dtype) - matched_gt_boxes = target_gather._target_gather(gt_boxes, matched_gt_idx) + matched_gt_boxes = target_gather._target_gather( + gt_boxes, matched_gt_idx + ) box_target = bounding_box._encode_box_to_deltas( anchors=anchor_boxes, boxes=matched_gt_boxes, @@ -118,11 +120,17 @@ def _encode_sample(self, box_labels, anchor_boxes): box_format="xywh", variance=self.box_variance, ) - matched_gt_cls_ids = target_gather._target_gather(gt_classes, matched_gt_idx) + matched_gt_cls_ids = target_gather._target_gather( + gt_classes, matched_gt_idx + ) + cls_target = tf.where( + tf.not_equal(positive_mask, 1.0), + self.background_class, + matched_gt_cls_ids, + ) cls_target = tf.where( - tf.not_equal(positive_mask, 1.0), self.background_class, matched_gt_cls_ids + tf.equal(ignore_mask, 1.0), self.ignore_class, cls_target ) - cls_target = tf.where(tf.equal(ignore_mask, 1.0), self.ignore_class, cls_target) label = tf.concat([box_target, cls_target], axis=-1) # In the case that a box in the corner of an image matches with an all -1 box @@ -131,7 +139,9 @@ def _encode_sample(self, box_labels, anchor_boxes): # training. The unit test passing all -1s to the label encoder ensures that we # properly handle this edge-case. label = tf.where( - tf.expand_dims(tf.math.reduce_any(tf.math.is_nan(label), axis=-1), axis=-1), + tf.expand_dims( + tf.math.reduce_any(tf.math.is_nan(label), axis=-1), axis=-1 + ), self.ignore_class, label, ) @@ -192,7 +202,10 @@ def call(self, images, box_labels): result = self._encode_sample(box_labels, anchor_boxes) result = bounding_box.convert_format( - result, source="xywh", target=self.bounding_box_format, images=images + result, + source="xywh", + target=self.bounding_box_format, + images=images, ) encoded_box_targets = result["boxes"] class_targets = result["classes"] diff --git a/keras_cv/layers/object_detection/retina_net_label_encoder_test.py b/keras_cv/layers/object_detection/retina_net_label_encoder_test.py index c356e95c42..300fa57604 100644 --- a/keras_cv/layers/object_detection/retina_net_label_encoder_test.py +++ b/keras_cv/layers/object_detection/retina_net_label_encoder_test.py @@ -95,7 +95,10 @@ def test_ragged_encoding(self): ] ) classes = tf.ragged.stack( - [tf.constant([[1], [1]], tf.float32), tf.constant([[1]], tf.float32)] + [ + tf.constant([[1], [1]], tf.float32), + tf.constant([[1]], tf.float32), + ] ) strides = [2**i for i in range(3, 8)] scales = [2**x for x in [0, 1 / 3, 2 / 3]] diff --git a/keras_cv/layers/object_detection/roi_align.py b/keras_cv/layers/object_detection/roi_align.py index 0d7222c2ac..50183186a7 100644 --- a/keras_cv/layers/object_detection/roi_align.py +++ b/keras_cv/layers/object_detection/roi_align.py @@ -78,7 +78,10 @@ def _feature_bilinear_interpolation( def _compute_grid_positions( - boxes: tf.Tensor, boundaries: tf.Tensor, output_size: int, sample_offset: float + boxes: tf.Tensor, + boundaries: tf.Tensor, + output_size: int, + sample_offset: float, ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor, tf.Tensor]: """ Computes the grid position w.r.t. the corresponding feature map. @@ -123,10 +126,18 @@ def _compute_grid_positions( box_grid_x0 = tf.maximum(tf.cast(0.0, dtype=box_grid_x0.dtype), box_grid_x0) box_grid_y0 = tf.maximum(tf.cast(0.0, dtype=box_grid_y0.dtype), box_grid_y0) - box_grid_x0 = tf.minimum(box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1)) - box_grid_x1 = tf.minimum(box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1)) - box_grid_y0 = tf.minimum(box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1)) - box_grid_y1 = tf.minimum(box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1)) + box_grid_x0 = tf.minimum( + box_grid_x0, tf.expand_dims(boundaries[:, :, 1], -1) + ) + box_grid_x1 = tf.minimum( + box_grid_x0 + 1, tf.expand_dims(boundaries[:, :, 1], -1) + ) + box_grid_y0 = tf.minimum( + box_grid_y0, tf.expand_dims(boundaries[:, :, 0], -1) + ) + box_grid_y1 = tf.minimum( + box_grid_y0 + 1, tf.expand_dims(boundaries[:, :, 0], -1) + ) box_gridx0x1 = tf.stack([box_grid_x0, box_grid_x1], axis=-1) box_gridy0y1 = tf.stack([box_grid_y0, box_grid_y1], axis=-1) @@ -209,7 +220,8 @@ def multilevel_crop_and_resize( # Calculate height_l * width_l for each level. level_dim_sizes = [ - feature_widths[i] * feature_heights[i] for i in range(len(feature_widths)) + feature_widths[i] * feature_heights[i] + for i in range(len(feature_widths)) ] # level_dim_offsets is accumulated sum of level_dim_size. level_dim_offsets = [0] @@ -229,7 +241,8 @@ def multilevel_crop_and_resize( # following the FPN paper to divide by 224. levels = tf.cast( tf.math.floordiv( - tf.math.log(tf.math.divide_no_nan(areas_sqrt, 224.0)), tf.math.log(2.0) + tf.math.log(tf.math.divide_no_nan(areas_sqrt, 224.0)), + tf.math.log(2.0), ) + 4.0, dtype=tf.int32, @@ -239,7 +252,8 @@ def multilevel_crop_and_resize( # Projects box location and sizes to corresponding feature levels. scale_to_level = tf.cast( - tf.pow(tf.constant(2.0), tf.cast(levels, tf.float32)), dtype=boxes.dtype + tf.pow(tf.constant(2.0), tf.cast(levels, tf.float32)), + dtype=boxes.dtype, ) boxes /= tf.expand_dims(scale_to_level, axis=2) box_width /= scale_to_level @@ -260,11 +274,15 @@ def multilevel_crop_and_resize( tf.concat( [ tf.expand_dims( - [[tf.cast(max_feature_height, tf.float32)]] / level_strides - 1, + [[tf.cast(max_feature_height, tf.float32)]] + / level_strides + - 1, axis=-1, ), tf.expand_dims( - [[tf.cast(max_feature_width, tf.float32)]] / level_strides - 1, + [[tf.cast(max_feature_width, tf.float32)]] + / level_strides + - 1, axis=-1, ), ], @@ -274,9 +292,12 @@ def multilevel_crop_and_resize( ) # Compute grid positions. - kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = _compute_grid_positions( - boxes, boundary, output_size, sample_offset - ) + ( + kernel_y, + kernel_x, + box_gridy0y1, + box_gridx0x1, + ) = _compute_grid_positions(boxes, boundary, output_size, sample_offset) x_indices = tf.cast( tf.reshape(box_gridx0x1, [batch_size, num_boxes, output_size * 2]), @@ -288,19 +309,23 @@ def multilevel_crop_and_resize( ) batch_size_offset = tf.tile( - tf.reshape(tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]), + tf.reshape( + tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1] + ), [1, num_boxes, output_size * 2, output_size * 2], ) # Get level offset for each box. Each box belongs to one level. levels_offset = tf.tile( tf.reshape( - tf.gather(level_dim_offsets, levels), [batch_size, num_boxes, 1, 1] + tf.gather(level_dim_offsets, levels), + [batch_size, num_boxes, 1, 1], ), [1, 1, output_size * 2, output_size * 2], ) y_indices_offset = tf.tile( tf.reshape( - y_indices * tf.expand_dims(tf.gather(height_dim_sizes, levels), -1), + y_indices + * tf.expand_dims(tf.gather(height_dim_sizes, levels), -1), [batch_size, num_boxes, output_size * 2, 1], ), [1, 1, 1, output_size * 2], @@ -310,7 +335,10 @@ def multilevel_crop_and_resize( [1, 1, output_size * 2, 1], ) indices = tf.reshape( - batch_size_offset + levels_offset + y_indices_offset + x_indices_offset, + batch_size_offset + + levels_offset + + y_indices_offset + + x_indices_offset, [-1], ) @@ -318,7 +346,13 @@ def multilevel_crop_and_resize( # performance. features_per_box = tf.reshape( tf.gather(features_r2, indices), - [batch_size, num_boxes, output_size * 2, output_size * 2, num_filters], + [ + batch_size, + num_boxes, + output_size * 2, + output_size * 2, + num_filters, + ], ) # Bilinear interpolation. @@ -336,7 +370,11 @@ class _ROIAligner(tf.keras.layers.Layer): """Performs ROIAlign for the second stage processing.""" def __init__( - self, bounding_box_format, target_size=7, sample_offset: float = 0.5, **kwargs + self, + bounding_box_format, + target_size=7, + sample_offset: float = 0.5, + **kwargs ): """ Generates ROI Aligner. @@ -375,7 +413,9 @@ def call( [batch_size, num_boxes, crop_size, crop_size, num_filters]. """ boxes = bounding_box.convert_format( - boxes, source=self._config_dict["bounding_box_format"], target="yxyx" + boxes, + source=self._config_dict["bounding_box_format"], + target="yxyx", ) roi_features = multilevel_crop_and_resize( features, diff --git a/keras_cv/layers/object_detection/roi_generator_test.py b/keras_cv/layers/object_detection/roi_generator_test.py index 4340b25666..0ba74d6596 100644 --- a/keras_cv/layers/object_detection/roi_generator_test.py +++ b/keras_cv/layers/object_detection/roi_generator_test.py @@ -22,7 +22,12 @@ def test_single_tensor(self): roi_generator = ROIGenerator("xyxy", nms_iou_threshold_train=0.96) rpn_boxes = tf.constant( [ - [[0, 0, 10, 10], [0.1, 0.1, 9.9, 9.9], [5, 5, 10, 10], [2, 2, 8, 8]], + [ + [0, 0, 10, 10], + [0.1, 0.1, 9.9, 9.9], + [5, 5, 10, 10], + [2, 2, 8, 8], + ], ] ) expected_rois = tf.gather(rpn_boxes, [[1, 3, 2]], batch_dims=1) @@ -35,7 +40,9 @@ def test_single_tensor(self): # selecting the 1st, then 3rd, then 2nd as they don't overlap # 0th box overlaps with 1st box expected_roi_scores = tf.gather(rpn_scores, [[1, 3, 2]], batch_dims=1) - expected_roi_scores = tf.concat([expected_roi_scores, tf.zeros([1, 1])], axis=1) + expected_roi_scores = tf.concat( + [expected_roi_scores, tf.zeros([1, 1])], axis=1 + ) rois, roi_scores = roi_generator(rpn_boxes, rpn_scores, training=True) self.assertAllClose(expected_rois, rois) self.assertAllClose(expected_roi_scores, roi_scores) @@ -44,7 +51,12 @@ def test_single_level_single_batch_roi_ignore_box(self): roi_generator = ROIGenerator("xyxy", nms_iou_threshold_train=0.96) rpn_boxes = tf.constant( [ - [[0, 0, 10, 10], [0.1, 0.1, 9.9, 9.9], [5, 5, 10, 10], [2, 2, 8, 8]], + [ + [0, 0, 10, 10], + [0.1, 0.1, 9.9, 9.9], + [5, 5, 10, 10], + [2, 2, 8, 8], + ], ] ) expected_rois = tf.gather(rpn_boxes, [[1, 3, 2]], batch_dims=1) @@ -58,7 +70,9 @@ def test_single_level_single_batch_roi_ignore_box(self): # selecting the 1st, then 3rd, then 2nd as they don't overlap # 0th box overlaps with 1st box expected_roi_scores = tf.gather(rpn_scores, [[1, 3, 2]], batch_dims=1) - expected_roi_scores = tf.concat([expected_roi_scores, tf.zeros([1, 1])], axis=1) + expected_roi_scores = tf.concat( + [expected_roi_scores, tf.zeros([1, 1])], axis=1 + ) rpn_scores = {2: rpn_scores} rois, roi_scores = roi_generator(rpn_boxes, rpn_scores, training=True) self.assertAllClose(expected_rois, rois) @@ -70,7 +84,12 @@ def test_single_level_single_batch_roi_all_box(self): roi_generator = ROIGenerator("xyxy", nms_iou_threshold_train=0.97) rpn_boxes = tf.constant( [ - [[0, 0, 10, 10], [0.1, 0.1, 9.9, 9.9], [5, 5, 10, 10], [2, 2, 8, 8]], + [ + [0, 0, 10, 10], + [0.1, 0.1, 9.9, 9.9], + [5, 5, 10, 10], + [2, 2, 8, 8], + ], ] ) expected_rois = tf.gather(rpn_boxes, [[1, 0, 3, 2]], batch_dims=1) @@ -81,7 +100,9 @@ def test_single_level_single_batch_roi_all_box(self): ] ) # selecting the 1st, then 0th, then 3rd, then 2nd as they don't overlap - expected_roi_scores = tf.gather(rpn_scores, [[1, 0, 3, 2]], batch_dims=1) + expected_roi_scores = tf.gather( + rpn_scores, [[1, 0, 3, 2]], batch_dims=1 + ) rpn_scores = {2: rpn_scores} rois, roi_scores = roi_generator(rpn_boxes, rpn_scores, training=True) self.assertAllClose(expected_rois, rois) @@ -91,11 +112,23 @@ def test_single_level_propose_rois(self): roi_generator = ROIGenerator("xyxy") rpn_boxes = tf.constant( [ - [[0, 0, 10, 10], [0.1, 0.1, 9.9, 9.9], [5, 5, 10, 10], [2, 2, 8, 8]], - [[2, 2, 4, 4], [3, 3, 6, 6], [3.1, 3.1, 6.1, 6.1], [1, 1, 8, 8]], + [ + [0, 0, 10, 10], + [0.1, 0.1, 9.9, 9.9], + [5, 5, 10, 10], + [2, 2, 8, 8], + ], + [ + [2, 2, 4, 4], + [3, 3, 6, 6], + [3.1, 3.1, 6.1, 6.1], + [1, 1, 8, 8], + ], ] ) - expected_rois = tf.gather(rpn_boxes, [[1, 3, 2], [1, 3, 0]], batch_dims=1) + expected_rois = tf.gather( + rpn_boxes, [[1, 3, 2], [1, 3, 0]], batch_dims=1 + ) expected_rois = tf.concat([expected_rois, tf.zeros([2, 1, 4])], axis=1) rpn_boxes = {2: rpn_boxes} rpn_scores = tf.constant([[0.6, 0.9, 0.2, 0.3], [0.1, 0.8, 0.3, 0.5]]) @@ -104,7 +137,9 @@ def test_single_level_propose_rois(self): expected_roi_scores = tf.gather( rpn_scores, [[1, 3, 2], [1, 3, 0]], batch_dims=1 ) - expected_roi_scores = tf.concat([expected_roi_scores, tf.zeros([2, 1])], axis=1) + expected_roi_scores = tf.concat( + [expected_roi_scores, tf.zeros([2, 1])], axis=1 + ) rpn_scores = {2: rpn_scores} rois, roi_scores = roi_generator(rpn_boxes, rpn_scores, training=True) self.assertAllClose(expected_rois, rois) @@ -114,8 +149,18 @@ def test_two_level_single_batch_propose_rois_ignore_box(self): roi_generator = ROIGenerator("xyxy") rpn_boxes = tf.constant( [ - [[0, 0, 10, 10], [0.1, 0.1, 9.9, 9.9], [5, 5, 10, 10], [2, 2, 8, 8]], - [[2, 2, 4, 4], [3, 3, 6, 6], [3.1, 3.1, 6.1, 6.1], [1, 1, 8, 8]], + [ + [0, 0, 10, 10], + [0.1, 0.1, 9.9, 9.9], + [5, 5, 10, 10], + [2, 2, 8, 8], + ], + [ + [2, 2, 4, 4], + [3, 3, 6, 6], + [3.1, 3.1, 6.1, 6.1], + [1, 1, 8, 8], + ], ] ) expected_rois = tf.constant( @@ -157,8 +202,18 @@ def test_two_level_single_batch_propose_rois_all_box(self): roi_generator = ROIGenerator("xyxy", nms_iou_threshold_train=0.99) rpn_boxes = tf.constant( [ - [[0, 0, 10, 10], [0.1, 0.1, 9.9, 9.9], [5, 5, 10, 10], [2, 2, 8, 8]], - [[2, 2, 4, 4], [3, 3, 6, 6], [3.1, 3.1, 6.1, 6.1], [1, 1, 8, 8]], + [ + [0, 0, 10, 10], + [0.1, 0.1, 9.9, 9.9], + [5, 5, 10, 10], + [2, 2, 8, 8], + ], + [ + [2, 2, 4, 4], + [3, 3, 6, 6], + [3.1, 3.1, 6.1, 6.1], + [1, 1, 8, 8], + ], ] ) expected_rois = tf.constant( diff --git a/keras_cv/layers/object_detection/roi_pool.py b/keras_cv/layers/object_detection/roi_pool.py index bd36218182..23402d4eb8 100644 --- a/keras_cv/layers/object_detection/roi_pool.py +++ b/keras_cv/layers/object_detection/roi_pool.py @@ -61,7 +61,11 @@ def __init__( raise ValueError( f"Expected `target_size` to be size 2, got {len(target_size)}" ) - if image_shape[0] is None or image_shape[1] is None or image_shape[2] is None: + if ( + image_shape[0] is None + or image_shape[1] is None + or image_shape[2] is None + ): raise ValueError( f"`image_shape` cannot have dynamic shape, got {image_shape}" ) @@ -122,12 +126,16 @@ def _pool_single_sample(self, args): height_end = tf.cast(height_end, tf.int32) # if feature_map shape smaller than roi, h_step would be 0 # in this case the result will be feature_map[0, 0, ...] - height_end = height_start + tf.maximum(1, height_end - height_start) + height_end = height_start + tf.maximum( + 1, height_end - height_start + ) width_start = x_start + j * w_step width_end = width_start + w_step width_start = tf.cast(width_start, tf.int32) width_end = tf.cast(width_end, tf.int32) - width_end = width_start + tf.maximum(1, width_end - width_start) + width_end = width_start + tf.maximum( + 1, width_end - width_start + ) # [h_step, w_step, C] region = feature_map[ height_start:height_end, width_start:width_end, : @@ -135,7 +143,8 @@ def _pool_single_sample(self, args): # target_height * target_width * [C] regions.append(tf.reduce_max(region, axis=[0, 1])) regions = tf.reshape( - tf.stack(regions), [self.target_height, self.target_width, channel] + tf.stack(regions), + [self.target_height, self.target_width, channel], ) return regions diff --git a/keras_cv/layers/object_detection/roi_pool_test.py b/keras_cv/layers/object_detection/roi_pool_test.py index cf04bb7fac..90d6011935 100644 --- a/keras_cv/layers/object_detection/roi_pool_test.py +++ b/keras_cv/layers/object_detection/roi_pool_test.py @@ -22,7 +22,9 @@ def test_no_quantize(self): roi_pooler = ROIPooler( "rel_yxyx", target_size=[2, 2], image_shape=[224, 224, 3] ) - feature_map = tf.expand_dims(tf.reshape(tf.range(64), [8, 8, 1]), axis=0) + feature_map = tf.expand_dims( + tf.reshape(tf.range(64), [8, 8, 1]), axis=0 + ) rois = tf.reshape(tf.constant([0.0, 0.0, 1.0, 1.0]), [1, 1, 4]) pooled_feature_map = roi_pooler(feature_map, rois) # the maximum value would be at bottom-right at each block, roi sharded into 2x2 blocks @@ -36,12 +38,18 @@ def test_no_quantize(self): # | 48, 49, 50, 51 | 52, 53, 54, 55 | # | 56, 57, 58, 59(max) | 60, 61, 62, 63(max) | # -------------------------------------------- - expected_feature_map = tf.reshape(tf.constant([27, 31, 59, 63]), [1, 2, 2, 1]) + expected_feature_map = tf.reshape( + tf.constant([27, 31, 59, 63]), [1, 2, 2, 1] + ) self.assertAllClose(expected_feature_map, pooled_feature_map) def test_roi_quantize_y(self): - roi_pooler = ROIPooler("yxyx", target_size=[2, 2], image_shape=[224, 224, 3]) - feature_map = tf.expand_dims(tf.reshape(tf.range(64), [8, 8, 1]), axis=0) + roi_pooler = ROIPooler( + "yxyx", target_size=[2, 2], image_shape=[224, 224, 3] + ) + feature_map = tf.expand_dims( + tf.reshape(tf.range(64), [8, 8, 1]), axis=0 + ) rois = tf.reshape(tf.constant([0.0, 0.0, 224, 220]), [1, 1, 4]) pooled_feature_map = roi_pooler(feature_map, rois) # the maximum value would be at bottom-right at each block, roi sharded into 2x2 blocks @@ -55,12 +63,18 @@ def test_roi_quantize_y(self): # | 48, 49, 50 | 51, 52, 53, 54 | 55 (removed) # | 56, 57, 58(max) | 59, 60, 61, 62(max) | 63 (removed) # -------------------------------------------- - expected_feature_map = tf.reshape(tf.constant([26, 30, 58, 62]), [1, 2, 2, 1]) + expected_feature_map = tf.reshape( + tf.constant([26, 30, 58, 62]), [1, 2, 2, 1] + ) self.assertAllClose(expected_feature_map, pooled_feature_map) def test_roi_quantize_x(self): - roi_pooler = ROIPooler("yxyx", target_size=[2, 2], image_shape=[224, 224, 3]) - feature_map = tf.expand_dims(tf.reshape(tf.range(64), [8, 8, 1]), axis=0) + roi_pooler = ROIPooler( + "yxyx", target_size=[2, 2], image_shape=[224, 224, 3] + ) + feature_map = tf.expand_dims( + tf.reshape(tf.range(64), [8, 8, 1]), axis=0 + ) rois = tf.reshape(tf.constant([0.0, 0.0, 220, 224]), [1, 1, 4]) pooled_feature_map = roi_pooler(feature_map, rois) # the maximum value would be at bottom-right at each block, roi sharded into 2x2 blocks @@ -73,12 +87,18 @@ def test_roi_quantize_x(self): # | 40, 41, 42, 43 | 44, 45, 46, 47 | # | 48, 49, 50, 51(max) | 52, 53, 54, 55(max) | # -------------------------------------------- - expected_feature_map = tf.reshape(tf.constant([19, 23, 51, 55]), [1, 2, 2, 1]) + expected_feature_map = tf.reshape( + tf.constant([19, 23, 51, 55]), [1, 2, 2, 1] + ) self.assertAllClose(expected_feature_map, pooled_feature_map) def test_roi_quantize_h(self): - roi_pooler = ROIPooler("yxyx", target_size=[3, 2], image_shape=[224, 224, 3]) - feature_map = tf.expand_dims(tf.reshape(tf.range(64), [8, 8, 1]), axis=0) + roi_pooler = ROIPooler( + "yxyx", target_size=[3, 2], image_shape=[224, 224, 3] + ) + feature_map = tf.expand_dims( + tf.reshape(tf.range(64), [8, 8, 1]), axis=0 + ) rois = tf.reshape(tf.constant([0.0, 0.0, 224, 224]), [1, 1, 4]) pooled_feature_map = roi_pooler(feature_map, rois) # the maximum value would be at bottom-right at each block, roi sharded into 3x2 blocks @@ -99,8 +119,12 @@ def test_roi_quantize_h(self): self.assertAllClose(expected_feature_map, pooled_feature_map) def test_roi_quantize_w(self): - roi_pooler = ROIPooler("yxyx", target_size=[2, 3], image_shape=[224, 224, 3]) - feature_map = tf.expand_dims(tf.reshape(tf.range(64), [8, 8, 1]), axis=0) + roi_pooler = ROIPooler( + "yxyx", target_size=[2, 3], image_shape=[224, 224, 3] + ) + feature_map = tf.expand_dims( + tf.reshape(tf.range(64), [8, 8, 1]), axis=0 + ) rois = tf.reshape(tf.constant([0.0, 0.0, 224, 224]), [1, 1, 4]) pooled_feature_map = roi_pooler(feature_map, rois) # the maximum value would be at bottom-right at each block, roi sharded into 2x3 blocks @@ -120,8 +144,12 @@ def test_roi_quantize_w(self): self.assertAllClose(expected_feature_map, pooled_feature_map) def test_roi_feature_map_height_smaller_than_roi(self): - roi_pooler = ROIPooler("yxyx", target_size=[6, 2], image_shape=[224, 224, 3]) - feature_map = tf.expand_dims(tf.reshape(tf.range(16), [4, 4, 1]), axis=0) + roi_pooler = ROIPooler( + "yxyx", target_size=[6, 2], image_shape=[224, 224, 3] + ) + feature_map = tf.expand_dims( + tf.reshape(tf.range(16), [4, 4, 1]), axis=0 + ) rois = tf.reshape(tf.constant([0.0, 0.0, 224, 224]), [1, 1, 4]) pooled_feature_map = roi_pooler(feature_map, rois) # | 0, 1(max) | 2, 3(max) | @@ -137,8 +165,12 @@ def test_roi_feature_map_height_smaller_than_roi(self): self.assertAllClose(expected_feature_map, pooled_feature_map) def test_roi_feature_map_width_smaller_than_roi(self): - roi_pooler = ROIPooler("yxyx", target_size=[2, 6], image_shape=[224, 224, 3]) - feature_map = tf.expand_dims(tf.reshape(tf.range(16), [4, 4, 1]), axis=0) + roi_pooler = ROIPooler( + "yxyx", target_size=[2, 6], image_shape=[224, 224, 3] + ) + feature_map = tf.expand_dims( + tf.reshape(tf.range(16), [4, 4, 1]), axis=0 + ) rois = tf.reshape(tf.constant([0.0, 0.0, 224, 224]), [1, 1, 4]) pooled_feature_map = roi_pooler(feature_map, rois) # | 0 | 1 | 2 | 3 | @@ -148,13 +180,18 @@ def test_roi_feature_map_width_smaller_than_roi(self): # | 12(max) | 13(max) | 14(max) | 15(max) | # -------------------------------------------- expected_feature_map = tf.reshape( - tf.constant([4, 4, 5, 6, 6, 7, 12, 12, 13, 14, 14, 15]), [1, 2, 6, 1] + tf.constant([4, 4, 5, 6, 6, 7, 12, 12, 13, 14, 14, 15]), + [1, 2, 6, 1], ) self.assertAllClose(expected_feature_map, pooled_feature_map) def test_roi_empty(self): - roi_pooler = ROIPooler("yxyx", target_size=[2, 2], image_shape=[224, 224, 3]) - feature_map = tf.expand_dims(tf.reshape(tf.range(1, 65), [8, 8, 1]), axis=0) + roi_pooler = ROIPooler( + "yxyx", target_size=[2, 2], image_shape=[224, 224, 3] + ) + feature_map = tf.expand_dims( + tf.reshape(tf.range(1, 65), [8, 8, 1]), axis=0 + ) rois = tf.reshape(tf.constant([0.0, 0.0, 0.0, 0.0]), [1, 1, 4]) pooled_feature_map = roi_pooler(feature_map, rois) # all outputs should be top-left pixel @@ -162,4 +199,6 @@ def test_roi_empty(self): def test_invalid_image_shape(self): with self.assertRaisesRegex(ValueError, "dynamic shape"): - _ = ROIPooler("rel_yxyx", target_size=[2, 2], image_shape=[None, 224, 3]) + _ = ROIPooler( + "rel_yxyx", target_size=[2, 2], image_shape=[None, 224, 3] + ) diff --git a/keras_cv/layers/object_detection/roi_sampler.py b/keras_cv/layers/object_detection/roi_sampler.py index 310bc9bc90..677aed8a94 100644 --- a/keras_cv/layers/object_detection/roi_sampler.py +++ b/keras_cv/layers/object_detection/roi_sampler.py @@ -101,7 +101,9 @@ def call( rois = tf.concat([rois, gt_boxes], axis=1) num_rois = rois.get_shape().as_list()[1] if num_rois is None: - raise ValueError(f"`rois` must have static shape, got {rois.get_shape()}") + raise ValueError( + f"`rois` must have static shape, got {rois.get_shape()}" + ) if num_rois < self.num_sampled_rois: raise ValueError( f"num_rois must be less than `num_sampled_rois` ({self.num_sampled_rois}), got {num_rois}" @@ -128,9 +130,13 @@ def call( tf.reduce_sum(tf.cast(negative_matches, tf.float32), axis=-1) ) # [batch_size, num_rois, 1] - background_mask = tf.expand_dims(tf.logical_not(positive_matches), axis=-1) + background_mask = tf.expand_dims( + tf.logical_not(positive_matches), axis=-1 + ) # [batch_size, num_rois, 1] - matched_gt_classes = target_gather._target_gather(gt_classes, matched_gt_cols) + matched_gt_classes = target_gather._target_gather( + gt_classes, matched_gt_cols + ) # also set all background matches to `background_class` matched_gt_classes = tf.where( background_mask, @@ -141,7 +147,9 @@ def call( matched_gt_classes, ) # [batch_size, num_rois, 4] - matched_gt_boxes = target_gather._target_gather(gt_boxes, matched_gt_cols) + matched_gt_boxes = target_gather._target_gather( + gt_boxes, matched_gt_cols + ) encoded_matched_gt_boxes = bounding_box._encode_box_to_deltas( anchors=rois, boxes=matched_gt_boxes, @@ -151,7 +159,9 @@ def call( ) # also set all background matches to 0 coordinates encoded_matched_gt_boxes = tf.where( - background_mask, tf.zeros_like(matched_gt_boxes), encoded_matched_gt_boxes + background_mask, + tf.zeros_like(matched_gt_boxes), + encoded_matched_gt_boxes, ) # [batch_size, num_rois] sampled_indicators = sampling.balanced_sample( @@ -177,7 +187,8 @@ def call( # [batch_size, num_sampled_rois, 1] # all negative samples will be ignored in regression sampled_box_weights = target_gather._target_gather( - tf.cast(positive_matches[..., tf.newaxis], gt_boxes.dtype), sampled_indices + tf.cast(positive_matches[..., tf.newaxis], gt_boxes.dtype), + sampled_indices, ) # [batch_size, num_sampled_rois, 1] sampled_indicators = sampled_indicators[..., tf.newaxis] diff --git a/keras_cv/layers/object_detection/roi_sampler_test.py b/keras_cv/layers/object_detection/roi_sampler_test.py index 45e04ae390..43332fbc07 100644 --- a/keras_cv/layers/object_detection/roi_sampler_test.py +++ b/keras_cv/layers/object_detection/roi_sampler_test.py @@ -29,7 +29,12 @@ def test_roi_sampler(self): append_gt_boxes=False, ) rois = tf.constant( - [[0, 0, 5, 5], [2.5, 2.5, 7.5, 7.5], [5, 5, 10, 10], [7.5, 7.5, 12.5, 12.5]] + [ + [0, 0, 5, 5], + [2.5, 2.5, 7.5, 7.5], + [5, 5, 10, 10], + [7.5, 7.5, 12.5, 12.5], + ] ) rois = rois[tf.newaxis, ...] # the 3rd box will generate 0 IOUs and not sampled. @@ -44,7 +49,9 @@ def test_roi_sampler(self): ) # given we only choose 1 positive sample, and `append_labesl` is False, # only the 2nd ROI is chosen. - expected_gt_boxes = tf.constant([[0.0, 0.0, 0, 0.0], [0.0, 0.0, 0, 0.0]]) + expected_gt_boxes = tf.constant( + [[0.0, 0.0, 0, 0.0], [0.0, 0.0, 0, 0.0]] + ) expected_gt_boxes = expected_gt_boxes[tf.newaxis, ...] # only the 2nd ROI is chosen, and the negative ROI is mapped to 0. expected_gt_classes = tf.constant([[10], [0]], dtype=tf.int32) @@ -53,7 +60,8 @@ def test_roi_sampler(self): tf.reduce_max(expected_gt_boxes), tf.reduce_max(sampled_gt_boxes) ) self.assertAllClose( - tf.reduce_min(expected_gt_classes), tf.reduce_min(sampled_gt_classes) + tf.reduce_min(expected_gt_classes), + tf.reduce_min(sampled_gt_classes), ) def test_roi_sampler_small_threshold(self): @@ -66,7 +74,12 @@ def test_roi_sampler_small_threshold(self): append_gt_boxes=False, ) rois = tf.constant( - [[0, 0, 5, 5], [2.5, 2.5, 7.5, 7.5], [5, 5, 10, 10], [7.5, 7.5, 12.5, 12.5]] + [ + [0, 0, 5, 5], + [2.5, 2.5, 7.5, 7.5], + [5, 5, 10, 10], + [7.5, 7.5, 12.5, 12.5], + ] ) rois = rois[tf.newaxis, ...] # the 3rd box will generate 0 IOUs and not sampled. @@ -100,7 +113,8 @@ def test_roi_sampler_small_threshold(self): tf.reduce_max(expected_rois, 1), tf.reduce_max(sampled_rois, 1) ) self.assertAllClose( - tf.reduce_max(expected_gt_boxes, 1), tf.reduce_max(sampled_gt_boxes, 1) + tf.reduce_max(expected_gt_boxes, 1), + tf.reduce_max(sampled_gt_boxes, 1), ) self.assertAllClose(expected_gt_classes, sampled_gt_classes) @@ -115,7 +129,12 @@ def test_roi_sampler_large_threshold(self): append_gt_boxes=False, ) rois = tf.constant( - [[0, 0, 5, 5], [2.5, 2.5, 7.5, 7.5], [5, 5, 10, 10], [7.5, 7.5, 12.5, 12.5]] + [ + [0, 0, 5, 5], + [2.5, 2.5, 7.5, 7.5], + [5, 5, 10, 10], + [7.5, 7.5, 12.5, 12.5], + ] ) rois = rois[tf.newaxis, ...] # the 3rd box will generate 0 IOUs and not sampled. @@ -149,7 +168,12 @@ def test_roi_sampler_large_threshold_custom_bg_class(self): append_gt_boxes=False, ) rois = tf.constant( - [[0, 0, 5, 5], [2.5, 2.5, 7.5, 7.5], [5, 5, 10, 10], [7.5, 7.5, 12.5, 12.5]] + [ + [0, 0, 5, 5], + [2.5, 2.5, 7.5, 7.5], + [5, 5, 10, 10], + [7.5, 7.5, 12.5, 12.5], + ] ) rois = rois[tf.newaxis, ...] # the 3rd box will generate 0 IOUs and not sampled. @@ -182,7 +206,12 @@ def test_roi_sampler_large_threshold_append_gt_boxes(self): append_gt_boxes=True, ) rois = tf.constant( - [[0, 0, 5, 5], [2.5, 2.5, 7.5, 7.5], [5, 5, 10, 10], [7.5, 7.5, 12.5, 12.5]] + [ + [0, 0, 5, 5], + [2.5, 2.5, 7.5, 7.5], + [5, 5, 10, 10], + [7.5, 7.5, 12.5, 12.5], + ] ) rois = rois[tf.newaxis, ...] # the 3rd box will generate 0 IOUs and not sampled. @@ -213,7 +242,12 @@ def test_roi_sampler_large_num_sampled_rois(self): append_gt_boxes=True, ) rois = tf.constant( - [[0, 0, 5, 5], [2.5, 2.5, 7.5, 7.5], [5, 5, 10, 10], [7.5, 7.5, 12.5, 12.5]] + [ + [0, 0, 5, 5], + [2.5, 2.5, 7.5, 7.5], + [5, 5, 10, 10], + [7.5, 7.5, 12.5, 12.5], + ] ) rois = rois[tf.newaxis, ...] # the 3rd box will generate 0 IOUs and not sampled. diff --git a/keras_cv/layers/object_detection/rpn_label_encoder.py b/keras_cv/layers/object_detection/rpn_label_encoder.py index 904ba14efb..66f016a103 100644 --- a/keras_cv/layers/object_detection/rpn_label_encoder.py +++ b/keras_cv/layers/object_detection/rpn_label_encoder.py @@ -113,7 +113,9 @@ def call( gt_boxes, source=self.ground_truth_box_format, target="yxyx" ) # [num_anchors, num_gt] or [batch_size, num_anchors, num_gt] - similarity_mat = iou.compute_iou(anchors, gt_boxes, bounding_box_format="yxyx") + similarity_mat = iou.compute_iou( + anchors, gt_boxes, bounding_box_format="yxyx" + ) # [num_anchors] or [batch_size, num_anchors] matched_gt_indices, matched_vals = self.box_matcher(similarity_mat) # [num_anchors] or [batch_size, num_anchors] @@ -125,7 +127,9 @@ def call( negative_matches = tf.math.equal(matched_vals, -1) # [num_anchors, 4] or [batch_size, num_anchors, 4] - matched_gt_boxes = target_gather._target_gather(gt_boxes, matched_gt_indices) + matched_gt_boxes = target_gather._target_gather( + gt_boxes, matched_gt_indices + ) # [num_anchors, 4] or [batch_size, num_anchors, 4], used as `y_true` for regression loss encoded_box_targets = bounding_box._encode_box_to_deltas( anchors, @@ -135,7 +139,9 @@ def call( variance=self.box_variance, ) # [num_anchors, 1] or [batch_size, num_anchors, 1] - box_sample_weights = tf.cast(positive_matches[..., tf.newaxis], gt_boxes.dtype) + box_sample_weights = tf.cast( + positive_matches[..., tf.newaxis], gt_boxes.dtype + ) # [num_anchors, 1] or [batch_size, num_anchors, 1] positive_mask = tf.expand_dims(positive_matches, axis=-1) @@ -144,7 +150,9 @@ def call( positive_classes = tf.ones_like(positive_mask, dtype=gt_classes.dtype) negative_classes = tf.zeros_like(positive_mask, dtype=gt_classes.dtype) # [num_anchors, 1] or [batch_size, num_anchors, 1] - class_targets = tf.where(positive_mask, positive_classes, negative_classes) + class_targets = tf.where( + positive_mask, positive_classes, negative_classes + ) # [num_anchors] or [batch_size, num_anchors] sampled_indicators = sampling.balanced_sample( positive_matches, @@ -157,8 +165,12 @@ def call( sampled_indicators[..., tf.newaxis], gt_classes.dtype ) if pack: - encoded_box_targets = self.unpack_targets(encoded_box_targets, anchors_dict) - box_sample_weights = self.unpack_targets(box_sample_weights, anchors_dict) + encoded_box_targets = self.unpack_targets( + encoded_box_targets, anchors_dict + ) + box_sample_weights = self.unpack_targets( + box_sample_weights, anchors_dict + ) class_targets = self.unpack_targets(class_targets, anchors_dict) class_sample_weights = self.unpack_targets( class_sample_weights, anchors_dict @@ -181,7 +193,9 @@ def unpack_targets(self, targets, anchors_dict): for level, anchors in anchors_dict.items(): num_anchors_lvl = anchors.get_shape().as_list()[0] if target_shape == 2: - unpacked_targets[level] = targets[count : count + num_anchors_lvl, ...] + unpacked_targets[level] = targets[ + count : count + num_anchors_lvl, ... + ] else: unpacked_targets[level] = targets[ :, count : count + num_anchors_lvl, ... diff --git a/keras_cv/layers/object_detection/rpn_label_encoder_test.py b/keras_cv/layers/object_detection/rpn_label_encoder_test.py index dade0b50c0..2924d3f8a4 100644 --- a/keras_cv/layers/object_detection/rpn_label_encoder_test.py +++ b/keras_cv/layers/object_detection/rpn_label_encoder_test.py @@ -28,7 +28,12 @@ def test_rpn_label_encoder(self): samples_per_image=2, ) rois = tf.constant( - [[0, 0, 5, 5], [2.5, 2.5, 7.5, 7.5], [5, 5, 10, 10], [7.5, 7.5, 12.5, 12.5]] + [ + [0, 0, 5, 5], + [2.5, 2.5, 7.5, 7.5], + [5, 5, 10, 10], + [7.5, 7.5, 12.5, 12.5], + ] ) # the 3rd box will generate 0 IOUs and not sampled. gt_boxes = tf.constant([[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5]]) @@ -95,7 +100,12 @@ def test_rpn_label_encoder_batched(self): samples_per_image=2, ) rois = tf.constant( - [[0, 0, 5, 5], [2.5, 2.5, 7.5, 7.5], [5, 5, 10, 10], [7.5, 7.5, 12.5, 12.5]] + [ + [0, 0, 5, 5], + [2.5, 2.5, 7.5, 7.5], + [5, 5, 10, 10], + [7.5, 7.5, 12.5, 12.5], + ] ) # the 3rd box will generate 0 IOUs and not sampled. gt_boxes = tf.constant([[10, 10, 15, 15], [2.5, 2.5, 7.5, 7.5]]) diff --git a/keras_cv/layers/object_detection/sampling.py b/keras_cv/layers/object_detection/sampling.py index 861d540dce..f6036c8d69 100644 --- a/keras_cv/layers/object_detection/sampling.py +++ b/keras_cv/layers/object_detection/sampling.py @@ -68,7 +68,9 @@ def balanced_sample( num_neg_samples = num_samples - num_pos_samples _, negative_indices = tf.math.top_k(values, k=num_neg_samples) selected_indices = tf.concat([positive_indices, negative_indices], axis=-1) - selected_indicators = tf.reduce_sum(tf.one_hot(selected_indices, depth=N), axis=-2) + selected_indicators = tf.reduce_sum( + tf.one_hot(selected_indices, depth=N), axis=-2 + ) selected_indicators = tf.minimum( selected_indicators, tf.ones_like(selected_indicators) ) diff --git a/keras_cv/layers/object_detection/sampling_test.py b/keras_cv/layers/object_detection/sampling_test.py index 7f22ead5ba..4270ff8bbf 100644 --- a/keras_cv/layers/object_detection/sampling_test.py +++ b/keras_cv/layers/object_detection/sampling_test.py @@ -20,7 +20,18 @@ class BalancedSamplingTest(tf.test.TestCase): def test_balanced_sampling(self): positive_matches = tf.constant( - [True, False, False, False, False, False, False, False, False, False] + [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] ) negative_matches = tf.constant( [False, True, True, True, True, True, True, True, True, True] @@ -36,8 +47,30 @@ def test_balanced_sampling(self): def test_balanced_batched_sampling(self): positive_matches = tf.constant( [ - [True, False, False, False, False, False, False, False, False, False], - [False, False, False, False, False, False, True, False, False, False], + [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ], + [ + False, + False, + False, + False, + False, + False, + True, + False, + False, + False, + ], ] ) negative_matches = tf.constant( @@ -58,7 +91,18 @@ def test_balanced_batched_sampling(self): def test_balanced_sampling_over_positive_fraction(self): positive_matches = tf.constant( - [True, False, False, False, False, False, False, False, False, False] + [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] ) negative_matches = tf.constant( [False, True, True, True, True, True, True, True, True, True] @@ -73,7 +117,18 @@ def test_balanced_sampling_over_positive_fraction(self): def test_balanced_sampling_under_positive_fraction(self): positive_matches = tf.constant( - [True, False, False, False, False, False, False, False, False, False] + [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] ) negative_matches = tf.constant( [False, True, True, True, True, True, True, True, True, True] @@ -89,7 +144,18 @@ def test_balanced_sampling_under_positive_fraction(self): def test_balanced_sampling_over_num_samples(self): positive_matches = tf.constant( - [True, False, False, False, False, False, False, False, False, False] + [ + True, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] ) negative_matches = tf.constant( [False, True, True, True, True, True, True, True, True, True] @@ -99,12 +165,26 @@ def test_balanced_sampling_over_num_samples(self): positive_fraction = 0.1 with self.assertRaisesRegex(ValueError, "has less element"): _ = balanced_sample( - positive_matches, negative_matches, num_samples, positive_fraction + positive_matches, + negative_matches, + num_samples, + positive_fraction, ) def test_balanced_sampling_no_positive(self): positive_matches = tf.constant( - [False, False, False, False, False, False, False, False, False, False] + [ + False, + False, + False, + False, + False, + False, + False, + False, + False, + False, + ] ) # the rest are neither positive nor negative, but ignord matches negative_matches = tf.constant( diff --git a/keras_cv/layers/object_detection_3d/center_net_label_encoder.py b/keras_cv/layers/object_detection_3d/center_net_label_encoder.py index caccf001a0..9e1e93271c 100644 --- a/keras_cv/layers/object_detection_3d/center_net_label_encoder.py +++ b/keras_cv/layers/object_detection_3d/center_net_label_encoder.py @@ -90,10 +90,14 @@ def compute_heatmap( # [B, N, max_num_voxels_per_box, 3] point_xyz = ( box_center[:, :, tf.newaxis, :] - + tf.constant(points_numpy, dtype=tf.float32)[tf.newaxis, tf.newaxis, :, :] + + tf.constant(points_numpy, dtype=tf.float32)[ + tf.newaxis, tf.newaxis, :, : + ] ) # [B, N, max_num_voxels_per_box, 3] - point_xyz = voxel_utils.point_to_voxel_coord(point_xyz, voxel_size, dtype=tf.int32) + point_xyz = voxel_utils.point_to_voxel_coord( + point_xyz, voxel_size, dtype=tf.int32 + ) # Map voxel back to xyz to get quantized version. # [B, N, max_num_voxels_per_box, 3] point_xyz = voxel_utils.voxel_coord_to_point( @@ -109,13 +113,17 @@ def compute_heatmap( # convert from box frame to vehicle frame. # [B, N, max_num_voxels_per_box, 3] point_xyz_transform = ( - point_xyz_rot + voxel_utils.inv_loc(rot, box_center)[:, :, tf.newaxis, :] + point_xyz_rot + + voxel_utils.inv_loc(rot, box_center)[:, :, tf.newaxis, :] ) # Due to the transform above, z=0 can be transformed to a non-zero value. For # 2d headmap, we do not want to use z. if voxel_size[2] > INF_VOXEL_SIZE: point_xyz_transform = tf.concat( - [point_xyz_transform[..., :2], tf.zeros_like(point_xyz_transform[..., :1])], + [ + point_xyz_transform[..., :2], + tf.zeros_like(point_xyz_transform[..., :1]), + ], axis=-1, ) @@ -151,11 +159,16 @@ def compute_heatmap( ) = voxel_utils.combined_static_and_dynamic_shape(point_xyz) box_id = tf.range(num_box, dtype=tf.int32) box_id = tf.tile( - box_id[tf.newaxis, :, tf.newaxis], [batch_size, 1, max_num_voxels_per_box] + box_id[tf.newaxis, :, tf.newaxis], + [batch_size, 1, max_num_voxels_per_box], ) - point_xyz = tf.reshape(point_xyz, [batch_size, num_box * max_num_voxels_per_box, 3]) - heatmap = tf.reshape(heatmap, [batch_size, num_box * max_num_voxels_per_box]) + point_xyz = tf.reshape( + point_xyz, [batch_size, num_box * max_num_voxels_per_box, 3] + ) + heatmap = tf.reshape( + heatmap, [batch_size, num_box * max_num_voxels_per_box] + ) box_id = tf.reshape(box_id, [batch_size, num_box * max_num_voxels_per_box]) mask = tf.reshape(mask, [batch_size, num_box * max_num_voxels_per_box]) @@ -205,11 +218,15 @@ def scatter_to_dense_heatmap( ) # [B, N] point_voxel_valid_mask = tf.math.reduce_all( - tf.math.logical_and(point_voxel_xyz >= 0, point_voxel_xyz < voxel_spatial_size), + tf.math.logical_and( + point_voxel_xyz >= 0, point_voxel_xyz < voxel_spatial_size + ), axis=-1, ) # [B, N] - point_voxel_valid_mask = tf.math.logical_and(point_voxel_valid_mask, point_mask) + point_voxel_valid_mask = tf.math.logical_and( + point_voxel_valid_mask, point_mask + ) # [B, N] point_voxel_xyz = point_voxel_xyz * tf.cast( point_voxel_valid_mask[..., tf.newaxis], dtype=point_voxel_xyz.dtype @@ -250,7 +267,9 @@ def fn(args): return dense_heatmap, dense_box_id -def decode_tensor(t: tf.Tensor, dims: Sequence[Union[tf.Tensor, int]]) -> tf.Tensor: +def decode_tensor( + t: tf.Tensor, dims: Sequence[Union[tf.Tensor, int]] +) -> tf.Tensor: """ Args: @@ -354,12 +373,21 @@ def call(self, box_3d, box_classes, box_mask): # heatmap - [B, num_boxes * max_num_voxels_per_box] # compute localized heatmap around its radius. point_xyz, point_mask, heatmap, box_id = compute_heatmap( - box_3d, box_mask, self._voxel_size, self._min_radius, self._max_radius + box_3d, + box_mask, + self._voxel_size, + self._min_radius, + self._max_radius, ) # heatmap - [B, H, W, Z] # scatter the localized heatmap to global heatmap in vehicle frame. dense_heatmap, dense_box_id = scatter_to_dense_heatmap( - point_xyz, point_mask, box_id, heatmap, self._voxel_size, self._spatial_size + point_xyz, + point_mask, + box_id, + heatmap, + self._voxel_size, + self._spatial_size, ) b, h, w, z = voxel_utils.combined_static_and_dynamic_shape(dense_box_id) # [B, H * W * Z] @@ -383,7 +411,9 @@ def call(self, box_3d, box_classes, box_mask): # [B, H, W, Z, 3] dense_box_3d_center = dense_box_3d[..., :3] - feature_map_ref_xyz # [B, H, W, Z, 7] - dense_box_3d = tf.concat([dense_box_3d_center, dense_box_3d[..., 3:]], axis=-1) + dense_box_3d = tf.concat( + [dense_box_3d_center, dense_box_3d[..., 3:]], axis=-1 + ) heatmap_dict = {} box_3d_dict = {} @@ -392,7 +422,8 @@ def call(self, box_3d, box_classes, box_mask): class_key = f"class_{i+1}" # Object class is 1-indexed (0 is background). dense_box_class_i = tf.cast( - tf.math.equal(dense_box_classes, i + 1), dtype=dense_heatmap.dtype + tf.math.equal(dense_box_classes, i + 1), + dtype=dense_heatmap.dtype, ) # [B, H, W, Z] dense_heatmap_i = dense_heatmap * dense_box_class_i @@ -412,6 +443,8 @@ def call(self, box_3d, box_classes, box_mask): top_k_heatmap_feature_idx_i = compute_top_k_heatmap_idx( dense_heatmap_i, self._top_k_heatmap[i] ) - top_k_heatmap_feature_idx_dict[class_key] = top_k_heatmap_feature_idx_i + top_k_heatmap_feature_idx_dict[ + class_key + ] = top_k_heatmap_feature_idx_i return heatmap_dict, box_3d_dict, top_k_heatmap_feature_idx_dict diff --git a/keras_cv/layers/object_detection_3d/heatmap_decoder.py b/keras_cv/layers/object_detection_3d/heatmap_decoder.py index 331d26ccef..5f3a354d06 100644 --- a/keras_cv/layers/object_detection_3d/heatmap_decoder.py +++ b/keras_cv/layers/object_detection_3d/heatmap_decoder.py @@ -133,7 +133,9 @@ def __init__( self.spatial_size = spatial_size self.built = True - def call(self, prediction: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: + def call( + self, prediction: tf.Tensor + ) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: """Accepts raw predictions, and returns decoded boxes. Args: @@ -144,7 +146,9 @@ def call(self, prediction: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: heatmap_mask = heatmap > self.heatmap_threshold heatmap_local_maxima_mask = tf.math.equal(heatmap, heatmap_pool) # [B, H, W, 1] - heatmap_mask = tf.math.logical_and(heatmap_mask, heatmap_local_maxima_mask) + heatmap_mask = tf.math.logical_and( + heatmap_mask, heatmap_local_maxima_mask + ) # [B, H, W, 1] heatmap = tf.where(heatmap_mask, heatmap, 0) # [B, H, W] @@ -166,7 +170,9 @@ def call(self, prediction: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: box_class = tf.ones_like(box_score, dtype=tf.int32) * self.class_id # [B*max_num_box, ?] f = box_prediction.get_shape().as_list()[-1] - box_prediction_reshape = tf.reshape(box_prediction, [b * self.max_num_box, f]) + box_prediction_reshape = tf.reshape( + box_prediction, [b * self.max_num_box, f] + ) # [B*max_num_box, 7] box_decoded = decode_bin_box( box_prediction_reshape, self.num_head_bin, self.anchor_size @@ -185,7 +191,9 @@ def call(self, prediction: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]: ref_xyz = tf.gather(ref_xyz, top_index, batch_dims=1) box_decoded_cxyz = ref_xyz + box_decoded[:, :, :3] - box_decoded = tf.concat([box_decoded_cxyz, box_decoded[:, :, 3:]], axis=-1) + box_decoded = tf.concat( + [box_decoded_cxyz, box_decoded[:, :, 3:]], axis=-1 + ) return box_decoded, box_class, box_score def get_config(self): diff --git a/keras_cv/layers/object_detection_3d/voxel_utils.py b/keras_cv/layers/object_detection_3d/voxel_utils.py index 645c67bb71..3bbfe0832d 100644 --- a/keras_cv/layers/object_detection_3d/voxel_utils.py +++ b/keras_cv/layers/object_detection_3d/voxel_utils.py @@ -41,7 +41,9 @@ def compute_feature_map_ref_xyz( """ voxel_spatial_size = compute_voxel_spatial_size(spatial_size, voxel_size) voxel_coord_meshgrid = np.mgrid[ - 0 : voxel_spatial_size[0], 0 : voxel_spatial_size[1], 0 : voxel_spatial_size[2] + 0 : voxel_spatial_size[0], + 0 : voxel_spatial_size[1], + 0 : voxel_spatial_size[2], ] voxel_coord = np.concatenate(voxel_coord_meshgrid[..., np.newaxis], axis=-1) # [H, W, Z, 3] @@ -56,7 +58,10 @@ def compute_feature_map_ref_xyz( # [H, W, Z, 3] ref = voxel_coord_to_point(voxel_coord, voxel_size, dtype=global_xyz.dtype) # [1, H, W, Z, 3] + [B, 1, 1, 1, 3] -> [B, H, W, Z, 3] - ref = ref[tf.newaxis, ...] + global_xyz[:, tf.newaxis, tf.newaxis, tf.newaxis, :] + ref = ( + ref[tf.newaxis, ...] + + global_xyz[:, tf.newaxis, tf.newaxis, tf.newaxis, :] + ) return ref @@ -81,7 +86,9 @@ def compute_voxel_spatial_size( voxel_spatial_size_float = [ i / j for i, j in zip(voxel_spatial_size_float, voxel_size) ] - voxel_spatial_size_int = [math.ceil(v - EPSILON) for v in voxel_spatial_size_float] + voxel_spatial_size_int = [ + math.ceil(v - EPSILON) for v in voxel_spatial_size_float + ] return voxel_spatial_size_int @@ -125,7 +132,9 @@ def point_to_voxel_coord( voxelized coordinates. """ with tf.name_scope("point_to_voxel_coord"): - point_voxelized = point_xyz / tf.constant(voxel_size, dtype=point_xyz.dtype) + point_voxelized = point_xyz / tf.constant( + voxel_size, dtype=point_xyz.dtype + ) assert dtype.is_integer or dtype.is_floating, f"{dtype}" # Note: tf.round casts float to the nearest integer. If the float is 0.5, it # casts it to the nearest even integer. @@ -198,7 +207,9 @@ def inv_loc(rot: tf.Tensor, loc: tf.Tensor) -> tf.Tensor: Returns: [..., 3] new location matrix. """ - new_loc = -1.0 * tf.linalg.matmul(rot, loc[..., tf.newaxis], transpose_a=True) + new_loc = -1.0 * tf.linalg.matmul( + rot, loc[..., tf.newaxis], transpose_a=True + ) return tf.squeeze(new_loc, axis=-1) @@ -214,7 +225,9 @@ def shape_int_compatible(t: tf.Tensor) -> tf.TensorShape: return tf.shape(t) -def combined_static_and_dynamic_shape(tensor: tf.Tensor) -> List[Union[tf.Tensor, int]]: +def combined_static_and_dynamic_shape( + tensor: tf.Tensor, +) -> List[Union[tf.Tensor, int]]: """Returns a list containing static and dynamic values for the dimensions. Returns a list of static and dynamic values for shape dimensions. This is @@ -272,7 +285,9 @@ def _pad_or_trim_to(x, shape, pad_val=0, pad_after_contents=True): expected_rank = len(shape) elif isinstance(shape, tf.TensorShape): if not shape.is_fully_defined(): - raise ValueError("shape %s padding %s must be fully defined." % (shape, x)) + raise ValueError( + "shape %s padding %s must be fully defined." % (shape, x) + ) expected_rank = shape.rank else: shape = _has_rank(shape, 1) diff --git a/keras_cv/layers/object_detection_3d/voxel_utils_test.py b/keras_cv/layers/object_detection_3d/voxel_utils_test.py index 6800de4ede..c7cf4b0329 100644 --- a/keras_cv/layers/object_detection_3d/voxel_utils_test.py +++ b/keras_cv/layers/object_detection_3d/voxel_utils_test.py @@ -29,7 +29,9 @@ def test_2D_constant_shape_pad(self): ) self.assertEqual(padded_x_right.shape.as_list(), [4, 6]) self.assertEqual(padded_x_left.shape.as_list(), [4, 6]) - real_x_right, real_x_left = self.evaluate([padded_x_right, padded_x_left]) + real_x_right, real_x_left = self.evaluate( + [padded_x_right, padded_x_left] + ) expected_x_right = [ [0.38615, 2.975221, -0.852826, 0.0, 0.0, 0.0], [-0.571142, -0.432439, 0.413158, 0.0, 0.0, 0.0], @@ -54,7 +56,9 @@ def test_2D_constant_shape_trim(self): ) self.assertEqual(trimmed_x_right.shape.as_list(), [1, 3]) self.assertEqual(trimmed_x_left.shape.as_list(), [1, 3]) - real_x_right, real_x_left = self.evaluate([trimmed_x_right, trimmed_x_left]) + real_x_right, real_x_left = self.evaluate( + [trimmed_x_right, trimmed_x_left] + ) expected_x_right = [[0.38615, 2.975221, -0.852826]] self.assertAllClose(expected_x_right, real_x_right) expected_x_left = [[0.255314, -0.985647, 1.461641]] diff --git a/keras_cv/layers/object_detection_3d/voxelization.py b/keras_cv/layers/object_detection_3d/voxelization.py index c6e329de93..86eb4f3334 100644 --- a/keras_cv/layers/object_detection_3d/voxelization.py +++ b/keras_cv/layers/object_detection_3d/voxelization.py @@ -54,7 +54,9 @@ def compute_point_voxel_id( if batch_size == 1: return point_voxel_id - batch_multiplier = tf.range(batch_size, dtype=tf.int32) * voxel_spatial_size_prod[0] + batch_multiplier = ( + tf.range(batch_size, dtype=tf.int32) * voxel_spatial_size_prod[0] + ) batch_multiplier = batch_multiplier[:, tf.newaxis] return point_voxel_id + batch_multiplier @@ -131,14 +133,18 @@ def call( # [B, N, dim] # convert point voxel to positive voxel index - point_voxel_xyz = point_voxel_xyz_int - voxel_origin[tf.newaxis, tf.newaxis, :] + point_voxel_xyz = ( + point_voxel_xyz_int - voxel_origin[tf.newaxis, tf.newaxis, :] + ) # [B, N] # remove points outside of the voxel boundary point_voxel_mask = tf.logical_and( point_voxel_xyz >= 0, point_voxel_xyz - < tf.constant(self._voxel_spatial_size, dtype=point_voxel_xyz.dtype), + < tf.constant( + self._voxel_spatial_size, dtype=point_voxel_xyz.dtype + ), ) point_voxel_mask = tf.math.reduce_all(point_voxel_mask, axis=-1) point_voxel_mask = tf.logical_and(point_voxel_mask, point_mask) @@ -189,7 +195,9 @@ def __init__( self._voxel_spatial_size = voxel_utils.compute_voxel_spatial_size( spatial_size, self._voxel_size ) - self._voxel_spatial_size_volume = np.prod(self._voxel_spatial_size).item() + self._voxel_spatial_size_volume = np.prod( + self._voxel_spatial_size + ).item() def call( self, @@ -221,7 +229,9 @@ def call( ) = self._voxelization_layer(point_xyz=point_xyz, point_mask=point_mask) # TODO(tanzhenyu): move compute_point_voxel_id to here, so PointToVoxel layer is more generic. point_feature = tf.concat([point_feature, point_voxel_feature], axis=-1) - batch_size = point_feature.shape.as_list()[0] or tf.shape(point_feature)[0] + batch_size = ( + point_feature.shape.as_list()[0] or tf.shape(point_feature)[0] + ) # [B, N, 1] point_mask_float = tf.cast(point_voxel_mask, point_feature.dtype)[ ..., tf.newaxis @@ -238,7 +248,9 @@ def call( point_voxel_id = tf.reshape(point_voxel_id, [-1]) # [B * num_voxels, new_dim] voxel_feature = tf.math.unsorted_segment_max( - point_feature, point_voxel_id, batch_size * self._voxel_spatial_size_volume + point_feature, + point_voxel_id, + batch_size * self._voxel_spatial_size_volume, ) # unsorted_segment_max sets empty values to -inf(float). voxel_feature_valid_mask = voxel_feature > VOXEL_FEATURE_MIN diff --git a/keras_cv/layers/object_detection_3d/voxelization_test.py b/keras_cv/layers/object_detection_3d/voxelization_test.py index f32132ceba..e3f8792d47 100644 --- a/keras_cv/layers/object_detection_3d/voxelization_test.py +++ b/keras_cv/layers/object_detection_3d/voxelization_test.py @@ -39,7 +39,9 @@ def test_voxelization_output_shape_no_z(self): shape=[1, 1000, 4], minval=-10, maxval=10, dtype=tf.float32 ) point_mask = tf.cast( - tf.random.uniform(shape=[1, 1000], minval=0, maxval=2, dtype=tf.int32), + tf.random.uniform( + shape=[1, 1000], minval=0, maxval=2, dtype=tf.int32 + ), tf.bool, ) output = layer(point_xyz, point_feature, point_mask) @@ -60,7 +62,9 @@ def test_voxelization_output_shape_with_z(self): shape=[1, 1000, 4], minval=-10, maxval=10, dtype=tf.float32 ) point_mask = tf.cast( - tf.random.uniform(shape=[1, 1000], minval=0, maxval=2, dtype=tf.int32), + tf.random.uniform( + shape=[1, 1000], minval=0, maxval=2, dtype=tf.int32 + ), tf.bool, ) output = layer(point_xyz, point_feature, point_mask) diff --git a/keras_cv/layers/preprocessing/__init__.py b/keras_cv/layers/preprocessing/__init__.py index 2d03b512d4..02ea9e4e77 100644 --- a/keras_cv/layers/preprocessing/__init__.py +++ b/keras_cv/layers/preprocessing/__init__.py @@ -40,7 +40,9 @@ RandomAugmentationPipeline, ) from keras_cv.layers.preprocessing.random_brightness import RandomBrightness -from keras_cv.layers.preprocessing.random_channel_shift import RandomChannelShift +from keras_cv.layers.preprocessing.random_channel_shift import ( + RandomChannelShift, +) from keras_cv.layers.preprocessing.random_choice import RandomChoice from keras_cv.layers.preprocessing.random_color_degeneration import ( RandomColorDegeneration, @@ -48,10 +50,14 @@ from keras_cv.layers.preprocessing.random_color_jitter import RandomColorJitter from keras_cv.layers.preprocessing.random_contrast import RandomContrast from keras_cv.layers.preprocessing.random_crop import RandomCrop -from keras_cv.layers.preprocessing.random_crop_and_resize import RandomCropAndResize +from keras_cv.layers.preprocessing.random_crop_and_resize import ( + RandomCropAndResize, +) from keras_cv.layers.preprocessing.random_cutout import RandomCutout from keras_cv.layers.preprocessing.random_flip import RandomFlip -from keras_cv.layers.preprocessing.random_gaussian_blur import RandomGaussianBlur +from keras_cv.layers.preprocessing.random_gaussian_blur import ( + RandomGaussianBlur, +) from keras_cv.layers.preprocessing.random_hue import RandomHue from keras_cv.layers.preprocessing.random_jpeg_quality import RandomJpegQuality from keras_cv.layers.preprocessing.random_rotation import RandomRotation @@ -60,8 +66,12 @@ from keras_cv.layers.preprocessing.random_shear import RandomShear from keras_cv.layers.preprocessing.random_translation import RandomTranslation from keras_cv.layers.preprocessing.random_zoom import RandomZoom -from keras_cv.layers.preprocessing.randomly_zoomed_crop import RandomlyZoomedCrop -from keras_cv.layers.preprocessing.repeated_augmentation import RepeatedAugmentation +from keras_cv.layers.preprocessing.randomly_zoomed_crop import ( + RandomlyZoomedCrop, +) +from keras_cv.layers.preprocessing.repeated_augmentation import ( + RepeatedAugmentation, +) from keras_cv.layers.preprocessing.rescaling import Rescaling from keras_cv.layers.preprocessing.resizing import Resizing from keras_cv.layers.preprocessing.solarization import Solarization diff --git a/keras_cv/layers/preprocessing/aug_mix.py b/keras_cv/layers/preprocessing/aug_mix.py index f3a15a6815..19660a3951 100644 --- a/keras_cv/layers/preprocessing/aug_mix.py +++ b/keras_cv/layers/preprocessing/aug_mix.py @@ -100,9 +100,13 @@ def __init__( def _sample_from_dirichlet(self, alpha): gamma_sample = tf.random.gamma( - shape=(), alpha=alpha, seed=self._random_generator.make_legacy_seed() + shape=(), + alpha=alpha, + seed=self._random_generator.make_legacy_seed(), + ) + return gamma_sample / tf.reduce_sum( + gamma_sample, axis=-1, keepdims=True ) - return gamma_sample / tf.reduce_sum(gamma_sample, axis=-1, keepdims=True) def _sample_from_beta(self, alpha, beta): sample_alpha = tf.random.gamma( @@ -159,7 +163,9 @@ def _posterize(self, image): bits = tf.cast(self.severity_factor() * 3, tf.int32) shift = tf.cast(4 - bits + 1, tf.uint8) image = tf.cast(image, tf.uint8) - image = tf.bitwise.left_shift(tf.bitwise.right_shift(image, shift), shift) + image = tf.bitwise.left_shift( + tf.bitwise.right_shift(image, shift), shift + ) image = tf.cast(image, self.compute_dtype) return preprocessing.transform_value_range( images=image, @@ -168,7 +174,9 @@ def _posterize(self, image): ) def _rotate(self, image): - angle = tf.expand_dims(tf.cast(self.severity_factor() * 30, tf.float32), axis=0) + angle = tf.expand_dims( + tf.cast(self.severity_factor() * 30, tf.float32), axis=0 + ) shape = tf.cast(tf.shape(image), tf.float32) return preprocessing.transform( @@ -177,7 +185,9 @@ def _rotate(self, image): )[0] def _solarize(self, image): - threshold = tf.cast(tf.cast(self.severity_factor() * 255, tf.int32), tf.float32) + threshold = tf.cast( + tf.cast(self.severity_factor() * 255, tf.int32), tf.float32 + ) image = preprocessing.transform_value_range( image, original_range=self.value_range, target_range=(0, 255) @@ -219,7 +229,8 @@ def _translate_x(self, image): tf.concat([x, tf.zeros_like(x)], axis=1), dtype=tf.float32 ) return preprocessing.transform( - tf.expand_dims(image, 0), preprocessing.get_translation_matrix(translations) + tf.expand_dims(image, 0), + preprocessing.get_translation_matrix(translations), )[0] def _translate_y(self, image): @@ -233,7 +244,8 @@ def _translate_y(self, image): tf.concat([tf.zeros_like(y), y], axis=1), dtype=tf.float32 ) return preprocessing.transform( - tf.expand_dims(image, 0), preprocessing.get_translation_matrix(translations) + tf.expand_dims(image, 0), + preprocessing.get_translation_matrix(translations), )[0] def _apply_op(self, image, op_index): diff --git a/keras_cv/layers/preprocessing/augmenter.py b/keras_cv/layers/preprocessing/augmenter.py index e6448f6fb4..ae61467299 100644 --- a/keras_cv/layers/preprocessing/augmenter.py +++ b/keras_cv/layers/preprocessing/augmenter.py @@ -39,5 +39,7 @@ def get_config(self): @classmethod def from_config(cls, config): if config["layers"] and isinstance(config["layers"][0], dict): - config["layers"] = tf.keras.utils.deserialize_keras_object(config["layers"]) + config["layers"] = tf.keras.utils.deserialize_keras_object( + config["layers"] + ) return cls(**config) diff --git a/keras_cv/layers/preprocessing/auto_contrast.py b/keras_cv/layers/preprocessing/auto_contrast.py index 9d36d67742..34e785cb5d 100644 --- a/keras_cv/layers/preprocessing/auto_contrast.py +++ b/keras_cv/layers/preprocessing/auto_contrast.py @@ -76,7 +76,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_labels(self, labels, transformations=None, **kwargs): return labels - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): return segmentation_masks def augment_keypoints(self, keypoints, transformations, **kwargs): @@ -86,7 +88,9 @@ def augment_targets(self, targets, transformations, **kwargs): return targets def augment_ragged_image(self, image, transformation, **kwargs): - return self.augment_images(image, transformations=transformation, **kwargs) + return self.augment_images( + image, transformations=transformation, **kwargs + ) def get_config(self): config = super().get_config() diff --git a/keras_cv/layers/preprocessing/auto_contrast_test.py b/keras_cv/layers/preprocessing/auto_contrast_test.py index 18591e1eab..c9da5958ce 100644 --- a/keras_cv/layers/preprocessing/auto_contrast_test.py +++ b/keras_cv/layers/preprocessing/auto_contrast_test.py @@ -44,7 +44,8 @@ def test_auto_contrast_expands_value_range(self): def test_auto_contrast_different_values_per_channel(self): img = tf.constant( - [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], dtype=tf.float32 + [[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], + dtype=tf.float32, ) img = tf.expand_dims(img, axis=0) diff --git a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py index fe6656250c..574b48152e 100644 --- a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py +++ b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py @@ -165,7 +165,9 @@ def compute_image_signature(self, images): """ if self.force_output_dense_images: return tf.TensorSpec(images.shape[1:], self.compute_dtype) - if self.force_output_ragged_images or isinstance(images, tf.RaggedTensor): + if self.force_output_ragged_images or isinstance( + images, tf.RaggedTensor + ): ragged_spec = tf.RaggedTensorSpec( shape=images.shape[1:], ragged_rank=1, @@ -182,7 +184,9 @@ def _compute_bounding_box_signature(self, bounding_boxes): ragged_rank=1, dtype=self.compute_dtype, ), - "classes": tf.RaggedTensorSpec(shape=[None], dtype=self.compute_dtype), + "classes": tf.RaggedTensorSpec( + shape=[None], dtype=self.compute_dtype + ), } # TODO(lukewood): promote to user facing API if needed @@ -205,19 +209,21 @@ def _compute_target_signature(self, targets): return tf.TensorSpec(targets.shape[1:], self.compute_dtype) def _compute_output_signature(self, inputs): - fn_output_signature = {IMAGES: self.compute_image_signature(inputs[IMAGES])} + fn_output_signature = { + IMAGES: self.compute_image_signature(inputs[IMAGES]) + } bounding_boxes = inputs.get(BOUNDING_BOXES, None) if bounding_boxes is not None: - fn_output_signature[BOUNDING_BOXES] = self._compute_bounding_box_signature( - bounding_boxes - ) + fn_output_signature[ + BOUNDING_BOXES + ] = self._compute_bounding_box_signature(bounding_boxes) segmentation_masks = inputs.get(SEGMENTATION_MASKS, None) if segmentation_masks is not None: - fn_output_signature[SEGMENTATION_MASKS] = self.compute_image_signature( - segmentation_masks - ) + fn_output_signature[ + SEGMENTATION_MASKS + ] = self.compute_image_signature(segmentation_masks) keypoints = inputs.get(KEYPOINTS, None) if keypoints is not None: @@ -249,7 +255,9 @@ def _map_fn(self, func, inputs): """ if self._any_ragged(inputs) or self.force_output_ragged_images: return tf.map_fn( - func, inputs, fn_output_signature=self._compute_output_signature(inputs) + func, + inputs, + fn_output_signature=self._compute_output_signature(inputs), ) if self.auto_vectorize: return tf.vectorized_map(func, inputs) @@ -330,7 +338,9 @@ def augment_keypoints(self, keypoints, transformation, **kwargs): """ raise NotImplementedError() - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): """Augment a single image's segmentation mask during training. Args: @@ -380,7 +390,9 @@ def call(self, inputs, training=True): if images.shape.rank == 3: return self._format_output(self._augment(inputs), metadata) elif images.shape.rank == 4: - return self._format_output(self._batch_augment(inputs), metadata) + return self._format_output( + self._batch_augment(inputs), metadata + ) else: raise ValueError( "Image augmentation layers are expecting inputs to be " @@ -486,7 +498,9 @@ def _format_inputs(self, inputs): ) if BOUNDING_BOXES in inputs: - inputs[BOUNDING_BOXES] = self._format_bounding_boxes(inputs[BOUNDING_BOXES]) + inputs[BOUNDING_BOXES] = self._format_bounding_boxes( + inputs[BOUNDING_BOXES] + ) if isinstance(inputs, dict) and TARGETS in inputs: # TODO(scottzhu): Check if it only contains the valid keys diff --git a/keras_cv/layers/preprocessing/base_image_augmentation_layer_test.py b/keras_cv/layers/preprocessing/base_image_augmentation_layer_test.py index ca7294d8ff..f28f41d191 100644 --- a/keras_cv/layers/preprocessing/base_image_augmentation_layer_test.py +++ b/keras_cv/layers/preprocessing/base_image_augmentation_layer_test.py @@ -48,7 +48,9 @@ def augment_bounding_boxes(self, bounding_boxes, transformation, **kwargs): def augment_keypoints(self, keypoints, transformation, **kwargs): return keypoints + transformation - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask + transformation @@ -78,7 +80,9 @@ def test_augment_casts_dtypes(self): images = tf.ones((2, 8, 8, 3), dtype="uint8") output = add_layer(images) - self.assertAllClose(tf.ones((2, 8, 8, 3), dtype="float32") * 3.0, output) + self.assertAllClose( + tf.ones((2, 8, 8, 3), dtype="float32") * 3.0, output + ) def test_augment_batch_images(self): add_layer = RandomAddLayer() @@ -168,7 +172,9 @@ def test_augment_image_and_localization_data(self): "segmentation_masks": segmentation_mask + 2.0, } - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) self.assertAllClose(output["images"], expected_output["images"]) self.assertAllClose(output["keypoints"], expected_output["keypoints"]) @@ -192,7 +198,9 @@ def test_augment_batch_image_and_localization_data(self): "classes": np.random.random(size=(2, 3)).astype("float32"), } keypoints = np.random.random(size=(2, 3, 5, 2)).astype("float32") - segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32") + segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype( + "float32" + ) output = add_layer( { @@ -207,10 +215,14 @@ def test_augment_batch_image_and_localization_data(self): output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] ) keypoints_diff = output["keypoints"] - keypoints - segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + segmentation_mask_diff = ( + output["segmentation_masks"] - segmentation_masks + ) self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) - self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) + self.assertNotAllClose( + segmentation_mask_diff[0], segmentation_mask_diff[1] + ) @tf.function def in_tf_function(inputs): @@ -229,10 +241,14 @@ def in_tf_function(inputs): output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] ) keypoints_diff = output["keypoints"] - keypoints - segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + segmentation_mask_diff = ( + output["segmentation_masks"] - segmentation_masks + ) self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) - self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) + self.assertNotAllClose( + segmentation_mask_diff[0], segmentation_mask_diff[1] + ) def test_augment_all_data_in_tf_function(self): add_layer = RandomAddLayer() @@ -242,7 +258,9 @@ def test_augment_all_data_in_tf_function(self): "classes": np.random.random(size=(2, 3)).astype("float32"), } keypoints = np.random.random(size=(2, 5, 2)).astype("float32") - segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32") + segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype( + "float32" + ) @tf.function def in_tf_function(inputs): @@ -261,7 +279,11 @@ def in_tf_function(inputs): output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] ) keypoints_diff = output["keypoints"] - keypoints - segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + segmentation_mask_diff = ( + output["segmentation_masks"] - segmentation_masks + ) self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) - self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) + self.assertNotAllClose( + segmentation_mask_diff[0], segmentation_mask_diff[1] + ) diff --git a/keras_cv/layers/preprocessing/channel_shuffle.py b/keras_cv/layers/preprocessing/channel_shuffle.py index 230b464098..b7ff9eb80d 100644 --- a/keras_cv/layers/preprocessing/channel_shuffle.py +++ b/keras_cv/layers/preprocessing/channel_shuffle.py @@ -69,7 +69,9 @@ def augment_image(self, image, transformation=None, **kwargs): ) channels_per_group = num_channels // self.groups - image = tf.reshape(image, [height, width, self.groups, channels_per_group]) + image = tf.reshape( + image, [height, width, self.groups, channels_per_group] + ) image = tf.transpose(image, perm=[2, 0, 1, 3]) image = tf.random.shuffle(image, seed=self.seed) image = tf.transpose(image, perm=[1, 2, 3, 0]) @@ -83,7 +85,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/keras_cv/layers/preprocessing/channel_shuffle_test.py b/keras_cv/layers/preprocessing/channel_shuffle_test.py index fc41dbcce0..ad31cbaba7 100644 --- a/keras_cv/layers/preprocessing/channel_shuffle_test.py +++ b/keras_cv/layers/preprocessing/channel_shuffle_test.py @@ -69,7 +69,9 @@ def test_non_square_image(self): def test_in_tf_function(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0), + tf.stack( + [2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0 + ), dtype=tf.float32, ) diff --git a/keras_cv/layers/preprocessing/cut_mix.py b/keras_cv/layers/preprocessing/cut_mix.py index 4ec1c328dc..a9f27fff99 100644 --- a/keras_cv/layers/preprocessing/cut_mix.py +++ b/keras_cv/layers/preprocessing/cut_mix.py @@ -89,8 +89,12 @@ def _cutmix(self, images, labels): input_shape[2], ) - permutation_order = tf.random.shuffle(tf.range(0, batch_size), seed=self.seed) - lambda_sample = self._sample_from_beta(self.alpha, self.alpha, (batch_size,)) + permutation_order = tf.random.shuffle( + tf.range(0, batch_size), seed=self.seed + ) + lambda_sample = self._sample_from_beta( + self.alpha, self.alpha, (batch_size,) + ) ratio = tf.math.sqrt(1 - lambda_sample) diff --git a/keras_cv/layers/preprocessing/cut_mix_test.py b/keras_cv/layers/preprocessing/cut_mix_test.py index a688a78b34..288480e08c 100644 --- a/keras_cv/layers/preprocessing/cut_mix_test.py +++ b/keras_cv/layers/preprocessing/cut_mix_test.py @@ -81,7 +81,9 @@ def test_cut_mix_call_results_one_channel(self): def test_in_tf_function(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0), + tf.stack( + [2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0 + ), tf.float32, ) ys = tf.one_hot(tf.constant([0, 1]), 2) @@ -126,7 +128,9 @@ def test_int_labels(self): ys = tf.one_hot(tf.constant([1, 0]), 2, dtype=tf.int32) inputs = {"images": xs, "labels": ys} layer = CutMix() - with self.assertRaisesRegexp(ValueError, "CutMix received labels with type"): + with self.assertRaisesRegexp( + ValueError, "CutMix received labels with type" + ): _ = layer(inputs) def test_image_input(self): diff --git a/keras_cv/layers/preprocessing/equalization.py b/keras_cv/layers/preprocessing/equalization.py index c767a22aef..d856e80a05 100644 --- a/keras_cv/layers/preprocessing/equalization.py +++ b/keras_cv/layers/preprocessing/equalization.py @@ -73,9 +73,9 @@ def equalize_channel(self, image, channel_index): histogram, ) - step = (tf.reduce_sum(histogram) - tf.reduce_min(histogram_without_zeroes)) // ( - self.bins - 1 - ) + step = ( + tf.reduce_sum(histogram) - tf.reduce_min(histogram_without_zeroes) + ) // (self.bins - 1) def build_mapping(histogram, step): # Compute the cumulative sum, shifting by step // 2 @@ -120,7 +120,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/keras_cv/layers/preprocessing/fourier_mix.py b/keras_cv/layers/preprocessing/fourier_mix.py index 1b92f25bd7..609a2f7346 100644 --- a/keras_cv/layers/preprocessing/fourier_mix.py +++ b/keras_cv/layers/preprocessing/fourier_mix.py @@ -85,7 +85,9 @@ def _apply_fftfreq(self, h, w): def _get_spectrum(self, freqs, decay_power, channel, h, w): # Function to apply a low pass filter by decaying its high frequency components. scale = tf.ones(1) / tf.cast( - tf.math.maximum(freqs, tf.convert_to_tensor([1 / tf.reduce_max([w, h])])) + tf.math.maximum( + freqs, tf.convert_to_tensor([1 / tf.reduce_max([w, h])]) + ) ** decay_power, tf.float32, ) @@ -116,7 +118,9 @@ def _binarise_mask(self, mask, lam, in_shape): # Create the final mask from the sampled values. idx = tf.argsort(tf.reshape(mask, [-1]), direction="DESCENDING") mask = tf.reshape(mask, [-1]) - num = tf.cast(tf.math.round(lam * tf.cast(tf.size(mask), tf.float32)), tf.int32) + num = tf.cast( + tf.math.round(lam * tf.cast(tf.size(mask), tf.float32)), tf.int32 + ) updates = tf.concat( [ @@ -144,7 +148,9 @@ def _batch_augment(self, inputs): ) images, lambda_sample, permutation_order = self._fourier_mix(images) if labels is not None: - labels = self._update_labels(labels, lambda_sample, permutation_order) + labels = self._update_labels( + labels, lambda_sample, permutation_order + ) inputs["labels"] = labels inputs["images"] = images return inputs @@ -158,19 +164,27 @@ def _augment(self, inputs): def _fourier_mix(self, images): shape = tf.shape(images) - permutation_order = tf.random.shuffle(tf.range(0, shape[0]), seed=self.seed) + permutation_order = tf.random.shuffle( + tf.range(0, shape[0]), seed=self.seed + ) - lambda_sample = self._sample_from_beta(self.alpha, self.alpha, (shape[0],)) + lambda_sample = self._sample_from_beta( + self.alpha, self.alpha, (shape[0],) + ) # generate masks utilizing mapped calls masks = tf.map_fn( - lambda x: self._sample_mask_from_transform(self.decay_power, shape[1:-1]), + lambda x: self._sample_mask_from_transform( + self.decay_power, shape[1:-1] + ), tf.range(shape[0], dtype=tf.float32), ) # binarise masks utilizing mapped calls masks = tf.map_fn( - lambda i: self._binarise_mask(masks[i], lambda_sample[i], shape[1:-1]), + lambda i: self._binarise_mask( + masks[i], lambda_sample[i], shape[1:-1] + ), tf.range(shape[0], dtype=tf.int32), fn_output_signature=tf.float32, ) @@ -187,10 +201,14 @@ def _update_labels(self, labels, lambda_sample, permutation_order): # for broadcasting batch_size = tf.expand_dims(tf.shape(labels)[0], -1) labels_rank = tf.rank(labels) - broadcast_shape = tf.concat([batch_size, tf.ones(labels_rank - 1, tf.int32)], 0) + broadcast_shape = tf.concat( + [batch_size, tf.ones(labels_rank - 1, tf.int32)], 0 + ) lambda_sample = tf.reshape(lambda_sample, broadcast_shape) - labels = lambda_sample * labels + (1.0 - lambda_sample) * labels_for_fmix + labels = ( + lambda_sample * labels + (1.0 - lambda_sample) * labels_for_fmix + ) return labels def get_config(self): diff --git a/keras_cv/layers/preprocessing/fourier_mix_test.py b/keras_cv/layers/preprocessing/fourier_mix_test.py index 79f95260aa..21e10091d5 100644 --- a/keras_cv/layers/preprocessing/fourier_mix_test.py +++ b/keras_cv/layers/preprocessing/fourier_mix_test.py @@ -87,11 +87,15 @@ def augment(x, y): def test_image_input_only(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0), + tf.stack( + [2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0 + ), tf.float32, ) layer = FourierMix() - with self.assertRaisesRegexp(ValueError, "expects inputs in a dictionary"): + with self.assertRaisesRegexp( + ValueError, "expects inputs in a dictionary" + ): _ = layer(xs) def test_single_image_input(self): diff --git a/keras_cv/layers/preprocessing/grayscale.py b/keras_cv/layers/preprocessing/grayscale.py index 7853d84dd0..c9889c36a3 100644 --- a/keras_cv/layers/preprocessing/grayscale.py +++ b/keras_cv/layers/preprocessing/grayscale.py @@ -69,7 +69,9 @@ def compute_ragged_image_signature(self, images): return ragged_spec def augment_ragged_image(self, image, transformation, **kwargs): - return self.augment_images(image, transformations=transformation, **kwargs) + return self.augment_images( + image, transformations=transformation, **kwargs + ) def augment_images(self, images, transformations=None, **kwargs): grayscale = tf.image.rgb_to_grayscale(images) @@ -86,7 +88,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_labels(self, labels, transformations=None, **kwargs): return labels - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): return segmentation_masks def get_config(self): diff --git a/keras_cv/layers/preprocessing/grid_mask.py b/keras_cv/layers/preprocessing/grid_mask.py index 560416d20d..43d300ba59 100644 --- a/keras_cv/layers/preprocessing/grid_mask.py +++ b/keras_cv/layers/preprocessing/grid_mask.py @@ -242,7 +242,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/keras_cv/layers/preprocessing/grid_mask_test.py b/keras_cv/layers/preprocessing/grid_mask_test.py index 91e7fdc103..654d5334a7 100644 --- a/keras_cv/layers/preprocessing/grid_mask_test.py +++ b/keras_cv/layers/preprocessing/grid_mask_test.py @@ -78,7 +78,9 @@ def test_non_square_image(self): def test_in_tf_function(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0), + tf.stack( + [2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0 + ), dtype=tf.float32, ) @@ -108,7 +110,9 @@ def test_in_single_image(self): dtype=tf.float32, ) - layer = GridMask(ratio_factor=(0.5, 0.5), fill_mode="constant", fill_value=0.0) + layer = GridMask( + ratio_factor=(0.5, 0.5), fill_mode="constant", fill_value=0.0 + ) xs = layer(xs, training=True) self.assertTrue(tf.math.reduce_any(xs == 0.0)) self.assertTrue(tf.math.reduce_any(xs == 1.0)) diff --git a/keras_cv/layers/preprocessing/jittered_resize_test.py b/keras_cv/layers/preprocessing/jittered_resize_test.py index ef8c5eb448..e31a72120b 100644 --- a/keras_cv/layers/preprocessing/jittered_resize_test.py +++ b/keras_cv/layers/preprocessing/jittered_resize_test.py @@ -57,7 +57,9 @@ def test_augment_bounding_box_single(self): seed=self.seed, ) output = layer(input, training=True) - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) expected_output = { "boxes": tf.convert_to_tensor([[0, 0, 1, 1]], dtype=tf.float32), "classes": tf.convert_to_tensor([0], dtype=tf.float32), @@ -91,7 +93,9 @@ def test_augment_boxes_batched_input(self): seed=self.seed, ) output = layer(input, training=True) - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) expected_output = { "classes": tf.convert_to_tensor([[0, 0], [0, 0]], dtype=tf.float32), "boxes": tf.convert_to_tensor( diff --git a/keras_cv/layers/preprocessing/maybe_apply.py b/keras_cv/layers/preprocessing/maybe_apply.py index fcf5ec907e..b2da9826f7 100644 --- a/keras_cv/layers/preprocessing/maybe_apply.py +++ b/keras_cv/layers/preprocessing/maybe_apply.py @@ -99,7 +99,9 @@ def __init__( super().__init__(seed=seed, **kwargs) if not (0 <= rate <= 1.0): - raise ValueError(f"rate must be in range [0, 1]. Received rate: {rate}") + raise ValueError( + f"rate must be in range [0, 1]. Received rate: {rate}" + ) self._layer = layer self._rate = rate @@ -108,7 +110,9 @@ def __init__( self.seed = seed def _should_augment(self): - return self._random_generator.random_uniform(shape=()) > 1.0 - self._rate + return ( + self._random_generator.random_uniform(shape=()) > 1.0 - self._rate + ) def _batch_augment(self, inputs): if self.batchwise: diff --git a/keras_cv/layers/preprocessing/mix_up.py b/keras_cv/layers/preprocessing/mix_up.py index 819f31fea7..8def7d63d0 100644 --- a/keras_cv/layers/preprocessing/mix_up.py +++ b/keras_cv/layers/preprocessing/mix_up.py @@ -67,7 +67,9 @@ def _batch_augment(self, inputs): bounding_boxes = inputs.get("bounding_boxes", None) images, lambda_sample, permutation_order = self._mixup(images) if labels is not None: - labels = self._update_labels(labels, lambda_sample, permutation_order) + labels = self._update_labels( + labels, lambda_sample, permutation_order + ) inputs["labels"] = labels if bounding_boxes is not None: bounding_boxes = self._update_bounding_boxes( @@ -86,9 +88,13 @@ def _augment(self, inputs): def _mixup(self, images): batch_size = tf.shape(images)[0] - permutation_order = tf.random.shuffle(tf.range(0, batch_size), seed=self.seed) + permutation_order = tf.random.shuffle( + tf.range(0, batch_size), seed=self.seed + ) - lambda_sample = self._sample_from_beta(self.alpha, self.alpha, (batch_size,)) + lambda_sample = self._sample_from_beta( + self.alpha, self.alpha, (batch_size,) + ) lambda_sample = tf.cast( tf.reshape(lambda_sample, [-1, 1, 1, 1]), dtype=self.compute_dtype ) @@ -106,7 +112,9 @@ def _update_labels(self, labels, lambda_sample, permutation_order): lambda_sample = tf.reshape(lambda_sample, [-1, 1]) - labels = lambda_sample * labels + (1.0 - lambda_sample) * labels_for_mixup + labels = ( + lambda_sample * labels + (1.0 - lambda_sample) * labels_for_mixup + ) return labels diff --git a/keras_cv/layers/preprocessing/mix_up_test.py b/keras_cv/layers/preprocessing/mix_up_test.py index 5d1d4b022e..dbeb1ddce8 100644 --- a/keras_cv/layers/preprocessing/mix_up_test.py +++ b/keras_cv/layers/preprocessing/mix_up_test.py @@ -35,7 +35,11 @@ def test_return_shapes(self): layer = MixUp() # mixup on labels outputs = layer( - {"images": xs, "labels": ys_labels, "bounding_boxes": ys_bounding_boxes} + { + "images": xs, + "labels": ys_labels, + "bounding_boxes": ys_bounding_boxes, + } ) xs, ys_labels, ys_bounding_boxes = ( outputs["images"], @@ -99,11 +103,15 @@ def augment(x, y): def test_image_input_only(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0), + tf.stack( + [2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0 + ), tf.float32, ) layer = MixUp() - with self.assertRaisesRegexp(ValueError, "expects inputs in a dictionary"): + with self.assertRaisesRegexp( + ValueError, "expects inputs in a dictionary" + ): _ = layer(xs) def test_single_image_input(self): @@ -121,7 +129,9 @@ def test_int_labels(self): ys = tf.one_hot(tf.constant([1, 0]), 2, dtype=tf.int32) inputs = {"images": xs, "labels": ys} layer = MixUp() - with self.assertRaisesRegexp(ValueError, "MixUp received labels with type"): + with self.assertRaisesRegexp( + ValueError, "MixUp received labels with type" + ): _ = layer(inputs) def test_image_input(self): diff --git a/keras_cv/layers/preprocessing/mosaic.py b/keras_cv/layers/preprocessing/mosaic.py index 1e44df1c10..93c0f2f4c9 100644 --- a/keras_cv/layers/preprocessing/mosaic.py +++ b/keras_cv/layers/preprocessing/mosaic.py @@ -93,7 +93,8 @@ def _batch_augment(self, inputs): ) # concatenate the batches with permutation order to get all 4 images of the mosaic permutation_order = tf.concat( - [tf.expand_dims(tf.range(batch_size), axis=-1), permutation_order], axis=-1 + [tf.expand_dims(tf.range(batch_size), axis=-1), permutation_order], + axis=-1, ) input_height, input_width, _ = images.shape[1:] @@ -106,7 +107,8 @@ def _batch_augment(self, inputs): ) mosaic_centers_y = ( self.center_sampler( - shape=tf.expand_dims(batch_size, axis=0), dtype=self.compute_dtype + shape=tf.expand_dims(batch_size, axis=0), + dtype=self.compute_dtype, ) * input_height ) @@ -211,7 +213,9 @@ def _update_image(self, images, permutation_order, mosaic_centers, index): output = tf.cast(output, self.compute_dtype) return tf.squeeze(output) - def _update_label(self, images, labels, permutation_order, mosaic_centers, index): + def _update_label( + self, images, labels, permutation_order, mosaic_centers, index + ): # updates labels for one output mosaic input_height, input_width, _ = images.shape[1:] labels_for_mosaic = tf.gather(labels, permutation_order[index]) @@ -236,7 +240,13 @@ def _update_label(self, images, labels, permutation_order, mosaic_centers, index return label def _update_bounding_box( - self, images, bounding_boxes, permutation_order, translate_x, translate_y, index + self, + images, + bounding_boxes, + permutation_order, + translate_x, + translate_y, + index, ): # updates bounding_boxes for one output mosaic bounding_boxes = bounding_box.convert_format( @@ -273,7 +283,10 @@ def _update_bounding_box( ], ) - boxes_for_mosaic = {"boxes": boxes_for_mosaic, "classes": classes_for_mosaic} + boxes_for_mosaic = { + "boxes": boxes_for_mosaic, + "classes": classes_for_mosaic, + } boxes_for_mosaic = bounding_box.clip_to_image( boxes_for_mosaic, bounding_box_format="xyxy", diff --git a/keras_cv/layers/preprocessing/mosaic_test.py b/keras_cv/layers/preprocessing/mosaic_test.py index e85fcdbf7a..8bfebf96b0 100644 --- a/keras_cv/layers/preprocessing/mosaic_test.py +++ b/keras_cv/layers/preprocessing/mosaic_test.py @@ -34,7 +34,11 @@ def test_return_shapes(self): layer = Mosaic(bounding_box_format="xywh") # mosaic on labels outputs = layer( - {"images": xs, "labels": ys_labels, "bounding_boxes": ys_bounding_boxes} + { + "images": xs, + "labels": ys_labels, + "bounding_boxes": ys_bounding_boxes, + } ) xs, ys_labels, ys_bounding_boxes = ( outputs["images"], @@ -71,11 +75,15 @@ def augment(x, y): def test_image_input_only(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0), + tf.stack( + [2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0 + ), tf.float32, ) layer = Mosaic() - with self.assertRaisesRegexp(ValueError, "expects inputs in a dictionary"): + with self.assertRaisesRegexp( + ValueError, "expects inputs in a dictionary" + ): _ = layer(xs) def test_single_image_input(self): @@ -93,7 +101,9 @@ def test_int_labels(self): ys = tf.one_hot(tf.constant([1, 0]), 2, dtype=tf.int32) inputs = {"images": xs, "labels": ys} layer = Mosaic() - with self.assertRaisesRegexp(ValueError, "Mosaic received labels with type"): + with self.assertRaisesRegexp( + ValueError, "Mosaic received labels with type" + ): _ = layer(inputs) def test_image_input(self): diff --git a/keras_cv/layers/preprocessing/posterization.py b/keras_cv/layers/preprocessing/posterization.py index 1a4b5a6cef..50a825ecc6 100644 --- a/keras_cv/layers/preprocessing/posterization.py +++ b/keras_cv/layers/preprocessing/posterization.py @@ -68,7 +68,9 @@ def __init__(self, value_range, bits, **kwargs): ) if not (0 < bits < 9): - raise ValueError(f"Bits value must be between 1-8. Received bits: {bits}.") + raise ValueError( + f"Bits value must be between 1-8. Received bits: {bits}." + ) self._shift = 8 - bits self._value_range = value_range @@ -94,7 +96,9 @@ def augment_image(self, image, **kwargs): def augment_bounding_boxes(self, bounding_boxes, **kwargs): return bounding_boxes - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def _batch_augment(self, inputs): diff --git a/keras_cv/layers/preprocessing/posterization_test.py b/keras_cv/layers/preprocessing/posterization_test.py index cc7cf9f6d8..eab153f188 100644 --- a/keras_cv/layers/preprocessing/posterization_test.py +++ b/keras_cv/layers/preprocessing/posterization_test.py @@ -43,12 +43,16 @@ def test_single_image(self): self.assertAllEqual(output, expected_output) def _get_random_bits(self): - return int(self.rng.uniform(shape=(), minval=1, maxval=9, dtype=tf.int32)) + return int( + self.rng.uniform(shape=(), minval=1, maxval=9, dtype=tf.int32) + ) def test_single_image_rescaled(self): bits = self._get_random_bits() dummy_input = self.rng.uniform(shape=(224, 224, 3), maxval=1.0) - expected_output = self._calc_expected_output(dummy_input * 255, bits=bits) / 255 + expected_output = ( + self._calc_expected_output(dummy_input * 255, bits=bits) / 255 + ) layer = Posterization(bits=bits, value_range=[0, 1]) output = layer(dummy_input) diff --git a/keras_cv/layers/preprocessing/rand_augment.py b/keras_cv/layers/preprocessing/rand_augment.py index 76d3f04d92..a2ec26d1c8 100644 --- a/keras_cv/layers/preprocessing/rand_augment.py +++ b/keras_cv/layers/preprocessing/rand_augment.py @@ -100,7 +100,11 @@ def __init__( super().__init__( layers=RandAugment.get_standard_policy( - (0, 255), magnitude, magnitude_stddev, geometric=geometric, seed=seed + (0, 255), + magnitude, + magnitude_stddev, + geometric=geometric, + seed=seed, ), augmentations_per_image=augmentations_per_image, rate=rate, @@ -141,8 +145,12 @@ def get_standard_policy( **policy["solarize"], value_range=value_range, seed=seed ) - color = cv_preprocessing.RandomColorDegeneration(**policy["color"], seed=seed) - contrast = cv_preprocessing.RandomContrast(**policy["contrast"], seed=seed) + color = cv_preprocessing.RandomColorDegeneration( + **policy["color"], seed=seed + ) + contrast = cv_preprocessing.RandomContrast( + **policy["contrast"], seed=seed + ) brightness = cv_preprocessing.RandomBrightness( **policy["brightness"], value_range=value_range, seed=seed ) @@ -157,8 +165,12 @@ def get_standard_policy( ] if geometric: - shear_x = cv_preprocessing.RandomShear(**policy["shear_x"], seed=seed) - shear_y = cv_preprocessing.RandomShear(**policy["shear_y"], seed=seed) + shear_x = cv_preprocessing.RandomShear( + **policy["shear_x"], seed=seed + ) + shear_y = cv_preprocessing.RandomShear( + **policy["shear_y"], seed=seed + ) translate_x = cv_preprocessing.RandomTranslation( **policy["translate_x"], seed=seed ) @@ -210,7 +222,10 @@ def solarize_policy(magnitude, magnitude_stddev): min_value=0, max_value=255, ) - return {"addition_factor": addition_factor, "threshold_factor": threshold_factor} + return { + "addition_factor": addition_factor, + "threshold_factor": threshold_factor, + } def color_policy(magnitude, magnitude_stddev): diff --git a/keras_cv/layers/preprocessing/rand_augment_test.py b/keras_cv/layers/preprocessing/rand_augment_test.py index 7217284716..4d55867768 100644 --- a/keras_cv/layers/preprocessing/rand_augment_test.py +++ b/keras_cv/layers/preprocessing/rand_augment_test.py @@ -39,11 +39,16 @@ def test_runs_with_magnitude(self, magnitude): ) def test_runs_with_value_range(self, low, high): rand_augment = layers.RandAugment( - augmentations_per_image=3, magnitude=0.5, rate=1.0, value_range=(low, high) + augmentations_per_image=3, + magnitude=0.5, + rate=1.0, + value_range=(low, high), ) xs = tf.random.uniform((2, 512, 512, 3), low, high, dtype=tf.float32) ys = rand_augment(xs) - self.assertTrue(tf.math.reduce_all(tf.logical_and(ys >= low, ys <= high))) + self.assertTrue( + tf.math.reduce_all(tf.logical_and(ys >= low, ys <= high)) + ) @parameterized.named_parameters( ("float32", tf.float32), @@ -75,7 +80,10 @@ def test_standard_policy_respects_value_range(self, lower, upper): def test_runs_unbatched(self): rand_augment = layers.RandAugment( - augmentations_per_image=3, magnitude=0.5, rate=1.0, value_range=(0, 255) + augmentations_per_image=3, + magnitude=0.5, + rate=1.0, + value_range=(0, 255), ) xs = tf.random.uniform((512, 512, 3), 0, 255, dtype=tf.float32) ys = rand_augment(xs) @@ -90,8 +98,15 @@ def test_runs_no_geo(self): value_range=(0, 255), ) self.assertFalse( - any([isinstance(x, layers.RandomTranslation) for x in rand_augment.layers]) + any( + [ + isinstance(x, layers.RandomTranslation) + for x in rand_augment.layers + ] + ) ) self.assertFalse( - any([isinstance(x, layers.RandomShear) for x in rand_augment.layers]) + any( + [isinstance(x, layers.RandomShear) for x in rand_augment.layers] + ) ) diff --git a/keras_cv/layers/preprocessing/random_aspect_ratio.py b/keras_cv/layers/preprocessing/random_aspect_ratio.py index 9aabc38a66..d9e47b9c21 100644 --- a/keras_cv/layers/preprocessing/random_aspect_ratio.py +++ b/keras_cv/layers/preprocessing/random_aspect_ratio.py @@ -46,7 +46,11 @@ def __init__( super().__init__(**kwargs) self.interpolation = keras_cv.utils.get_interpolation(interpolation) self.factor = keras_cv.utils.parse_factor( - factor, min_value=0.0, max_value=None, seed=seed, param_name="factor" + factor, + min_value=0.0, + max_value=None, + seed=seed, + param_name="factor", ) self.bounding_box_format = bounding_box_format self.seed = seed @@ -63,7 +67,9 @@ def compute_image_signature(self, images): dtype=self.compute_dtype, ) - def augment_bounding_boxes(self, bounding_boxes, transformation, image, **kwargs): + def augment_bounding_boxes( + self, bounding_boxes, transformation, image, **kwargs + ): if self.bounding_box_format is None: raise ValueError( "Please provide a `bounding_box_format` when augmenting " @@ -105,7 +111,9 @@ def augment_image(self, image, transformation, **kwargs): width = width * transformation target_size = tf.cast(tf.stack([height, width]), tf.int32) - result = tf.image.resize(image, size=target_size, method=self.interpolation) + result = tf.image.resize( + image, size=target_size, method=self.interpolation + ) return tf.cast(result, self.compute_dtype) def augment_label(self, label, transformation, **kwargs): diff --git a/keras_cv/layers/preprocessing/random_aspect_ratio_test.py b/keras_cv/layers/preprocessing/random_aspect_ratio_test.py index cfe7d72c53..3ad4e268b2 100644 --- a/keras_cv/layers/preprocessing/random_aspect_ratio_test.py +++ b/keras_cv/layers/preprocessing/random_aspect_ratio_test.py @@ -59,6 +59,8 @@ def test_augment_boxes_ragged(self): factor=(0.9, 1.1), bounding_box_format="rel_xywh" ) output = layer(input, training=True) - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) bounding_boxes = bounding_box.to_dense(bounding_boxes) self.assertAllClose(bounding_boxes, output["bounding_boxes"]) diff --git a/keras_cv/layers/preprocessing/random_augmentation_pipeline_test.py b/keras_cv/layers/preprocessing/random_augmentation_pipeline_test.py index e51d71d056..98192f6b58 100644 --- a/keras_cv/layers/preprocessing/random_augmentation_pipeline_test.py +++ b/keras_cv/layers/preprocessing/random_augmentation_pipeline_test.py @@ -29,10 +29,14 @@ def call(self, inputs): class RandomAugmentationPipelineTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(("1", 1), ("3", 3), ("5", 5)) - def test_calls_layers_augmentations_per_image_times(self, augmentations_per_image): + def test_calls_layers_augmentations_per_image_times( + self, augmentations_per_image + ): layer = AddOneToInputs() pipeline = layers.RandomAugmentationPipeline( - layers=[layer], augmentations_per_image=augmentations_per_image, rate=1.0 + layers=[layer], + augmentations_per_image=augmentations_per_image, + rate=1.0, ) xs = tf.random.uniform((2, 5, 5, 3), 0, 100, dtype=tf.float32) os = pipeline(xs) @@ -69,7 +73,9 @@ def test_calls_layers_augmentations_per_image_times_single_image( ): layer = AddOneToInputs() pipeline = layers.RandomAugmentationPipeline( - layers=[layer], augmentations_per_image=augmentations_per_image, rate=1.0 + layers=[layer], + augmentations_per_image=augmentations_per_image, + rate=1.0, ) xs = tf.random.uniform((5, 5, 3), 0, 100, dtype=tf.float32) os = pipeline(xs) @@ -80,7 +86,9 @@ def test_calls_layers_augmentations_per_image_times_single_image( def test_respects_rate(self, augmentations_per_image): layer = AddOneToInputs() pipeline = layers.RandomAugmentationPipeline( - layers=[layer], augmentations_per_image=augmentations_per_image, rate=0.0 + layers=[layer], + augmentations_per_image=augmentations_per_image, + rate=0.0, ) xs = tf.random.uniform((2, 5, 5, 3), 0, 100, dtype=tf.float32) os = pipeline(xs) diff --git a/keras_cv/layers/preprocessing/random_brightness.py b/keras_cv/layers/preprocessing/random_brightness.py index d8919c6cea..4d39b7ed59 100644 --- a/keras_cv/layers/preprocessing/random_brightness.py +++ b/keras_cv/layers/preprocessing/random_brightness.py @@ -93,12 +93,16 @@ def augment_images(self, images, transformations, **kwargs): ) rgb_deltas = tf.cast(transformations, images.dtype) images += rgb_deltas - return tf.clip_by_value(images, self.value_range[0], self.value_range[1]) + return tf.clip_by_value( + images, self.value_range[0], self.value_range[1] + ) def augment_labels(self, labels, transformations, **kwargs): return labels - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): return segmentation_masks def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): @@ -116,5 +120,7 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["factor"], dict): - config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + config["factor"] = tf.keras.utils.deserialize_keras_object( + config["factor"] + ) return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_brightness_test.py b/keras_cv/layers/preprocessing/random_brightness_test.py index c6279d2bb1..4b51b8cf66 100644 --- a/keras_cv/layers/preprocessing/random_brightness_test.py +++ b/keras_cv/layers/preprocessing/random_brightness_test.py @@ -44,16 +44,22 @@ def test_max_brightness(self): layer = preprocessing.RandomBrightness(factor=(1, 1)) output = layer(image) - self.assertAllClose(output, tf.fill((4, 8, 8, 3), 255), atol=1e-5, rtol=1e-5) + self.assertAllClose( + output, tf.fill((4, 8, 8, 3), 255), atol=1e-5, rtol=1e-5 + ) def test_max_brightness_rescaled_value_range(self): image_shape = (4, 8, 8, 3) image = tf.random.uniform(shape=image_shape) - layer = preprocessing.RandomBrightness(value_range=(0, 1), factor=(1, 1)) + layer = preprocessing.RandomBrightness( + value_range=(0, 1), factor=(1, 1) + ) output = layer(image) - self.assertAllClose(output, tf.fill((4, 8, 8, 3), 1), atol=1e-5, rtol=1e-5) + self.assertAllClose( + output, tf.fill((4, 8, 8, 3), 1), atol=1e-5, rtol=1e-5 + ) def test_zero_brightness(self): image_shape = (4, 8, 8, 3) @@ -62,11 +68,15 @@ def test_zero_brightness(self): layer = preprocessing.RandomBrightness(factor=(-1, -1)) output = layer(image) - self.assertAllClose(output, tf.fill((4, 8, 8, 3), 0), atol=1e-5, rtol=1e-5) + self.assertAllClose( + output, tf.fill((4, 8, 8, 3), 0), atol=1e-5, rtol=1e-5 + ) def test_with_unit8(self): image_shape = (4, 8, 8, 3) - image = tf.cast(tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8) + image = tf.cast( + tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8 + ) layer = preprocessing.RandomBrightness(factor=0) output = layer(image) @@ -77,7 +87,9 @@ def test_with_unit8(self): self.assertNotAllClose(image, output) def test_config(self): - layer = preprocessing.RandomBrightness(value_range=(0, 1), factor=(0.3, 0.8)) + layer = preprocessing.RandomBrightness( + value_range=(0, 1), factor=(0.3, 0.8) + ) config = layer.get_config() self.assertTrue(isinstance(config["factor"], core.UniformFactorSampler)) self.assertEqual(config["factor"].get_config()["lower"], 0.3) diff --git a/keras_cv/layers/preprocessing/random_channel_shift.py b/keras_cv/layers/preprocessing/random_channel_shift.py index a4095305cd..fa3cdd1540 100644 --- a/keras_cv/layers/preprocessing/random_channel_shift.py +++ b/keras_cv/layers/preprocessing/random_channel_shift.py @@ -101,7 +101,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/keras_cv/layers/preprocessing/random_channel_shift_test.py b/keras_cv/layers/preprocessing/random_channel_shift_test.py index 04039c0bb1..f92bdb2834 100644 --- a/keras_cv/layers/preprocessing/random_channel_shift_test.py +++ b/keras_cv/layers/preprocessing/random_channel_shift_test.py @@ -22,7 +22,9 @@ class RandomChannelShiftTest(tf.test.TestCase, parameterized.TestCase): def test_return_shapes(self): xs = tf.ones((2, 512, 512, 3)) - layer = preprocessing.RandomChannelShift(factor=1.0, value_range=(0, 255)) + layer = preprocessing.RandomChannelShift( + factor=1.0, value_range=(0, 255) + ) xs = layer(xs, training=True) self.assertEqual(xs.shape, [2, 512, 512, 3]) @@ -45,10 +47,14 @@ def test_non_square_image(self): def test_in_tf_function(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 3)), tf.ones((100, 100, 3))], axis=0), + tf.stack( + [2 * tf.ones((100, 100, 3)), tf.ones((100, 100, 3))], axis=0 + ), dtype=tf.float32, ) - layer = preprocessing.RandomChannelShift(factor=0.3, value_range=(0, 255)) + layer = preprocessing.RandomChannelShift( + factor=0.3, value_range=(0, 255) + ) @tf.function def augment(x): @@ -85,7 +91,9 @@ def test_in_single_image(self): tf.ones((512, 512, 3)), dtype=tf.float32, ) - layer = preprocessing.RandomChannelShift(factor=0.4, value_range=(0, 255)) + layer = preprocessing.RandomChannelShift( + factor=0.4, value_range=(0, 255) + ) xs = layer(xs, training=True) self.assertFalse(tf.math.reduce_any(xs == 1.0)) @@ -100,14 +108,18 @@ def test_config(self): self.assertEqual(config["channels"], 3) self.assertEqual(config["seed"], 101) - reconstructed_layer = preprocessing.RandomChannelShift.from_config(config) + reconstructed_layer = preprocessing.RandomChannelShift.from_config( + config + ) self.assertEqual(reconstructed_layer.factor, layer.factor) self.assertEqual(reconstructed_layer.value_range, layer.value_range) self.assertEqual(reconstructed_layer.seed, layer.seed) self.assertEqual(reconstructed_layer.channels, layer.channels) def test_inference(self): - layer = preprocessing.RandomChannelShift(factor=0.8, value_range=(0, 255)) + layer = preprocessing.RandomChannelShift( + factor=0.8, value_range=(0, 255) + ) inputs = np.random.randint(0, 255, size=(224, 224, 3)) output = layer(inputs, training=False) self.assertAllClose(inputs, output) diff --git a/keras_cv/layers/preprocessing/random_choice_test.py b/keras_cv/layers/preprocessing/random_choice_test.py index e743f9270e..8f96ebcacf 100644 --- a/keras_cv/layers/preprocessing/random_choice_test.py +++ b/keras_cv/layers/preprocessing/random_choice_test.py @@ -61,11 +61,15 @@ def test_calls_layer_augmentation_single_image(self): def test_calls_choose_one_layer_augmentation(self): batch_size = 10 - pipeline = layers.RandomChoice(layers=[AddOneToInputs(), AddOneToInputs()]) + pipeline = layers.RandomChoice( + layers=[AddOneToInputs(), AddOneToInputs()] + ) xs = tf.random.uniform((batch_size, 5, 5, 3), 0, 100, dtype=tf.float32) os = pipeline(xs) self.assertAllClose(xs + 1, os) - total_calls = pipeline.layers[0].call_counter + pipeline.layers[1].call_counter + total_calls = ( + pipeline.layers[0].call_counter + pipeline.layers[1].call_counter + ) self.assertEqual(total_calls, batch_size) diff --git a/keras_cv/layers/preprocessing/random_color_degeneration.py b/keras_cv/layers/preprocessing/random_color_degeneration.py index 47717616d3..f1d5e25012 100644 --- a/keras_cv/layers/preprocessing/random_color_degeneration.py +++ b/keras_cv/layers/preprocessing/random_color_degeneration.py @@ -68,7 +68,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): @@ -79,5 +81,7 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["factor"], dict): - config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + config["factor"] = tf.keras.utils.deserialize_keras_object( + config["factor"] + ) return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_color_jitter.py b/keras_cv/layers/preprocessing/random_color_jitter.py index 6b2051bb23..0f455a3473 100644 --- a/keras_cv/layers/preprocessing/random_color_jitter.py +++ b/keras_cv/layers/preprocessing/random_color_jitter.py @@ -140,7 +140,9 @@ def augment_images(self, images, transformations=None, **kwargs): def augment_labels(self, labels, transformations, **kwargs): return labels - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): return segmentation_masks def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): diff --git a/keras_cv/layers/preprocessing/random_color_jitter_test.py b/keras_cv/layers/preprocessing/random_color_jitter_test.py index 9f16fe0ecf..4405189999 100644 --- a/keras_cv/layers/preprocessing/random_color_jitter_test.py +++ b/keras_cv/layers/preprocessing/random_color_jitter_test.py @@ -91,10 +91,18 @@ def test_config(self): self.assertEqual(config["saturation_factor"], (0.5, 0.9)) self.assertEqual(config["hue_factor"], 0.5) - reconstructed_layer = preprocessing.RandomColorJitter.from_config(config) - self.assertEqual(reconstructed_layer.brightness_factor, layer.brightness_factor) - self.assertEqual(reconstructed_layer.contrast_factor, layer.contrast_factor) - self.assertEqual(reconstructed_layer.saturation_factor, layer.saturation_factor) + reconstructed_layer = preprocessing.RandomColorJitter.from_config( + config + ) + self.assertEqual( + reconstructed_layer.brightness_factor, layer.brightness_factor + ) + self.assertEqual( + reconstructed_layer.contrast_factor, layer.contrast_factor + ) + self.assertEqual( + reconstructed_layer.saturation_factor, layer.saturation_factor + ) self.assertEqual(reconstructed_layer.hue_factor, layer.hue_factor) # Test 5: Check if inference model is OK. diff --git a/keras_cv/layers/preprocessing/random_contrast.py b/keras_cv/layers/preprocessing/random_contrast.py index b149a9fce0..2003678b20 100644 --- a/keras_cv/layers/preprocessing/random_contrast.py +++ b/keras_cv/layers/preprocessing/random_contrast.py @@ -86,7 +86,9 @@ def augment_ragged_image(self, image, transformation, **kwargs): def augment_images(self, images, transformations, **kwargs): contrast_factors = tf.cast(transformations, dtype=images.dtype) # broadcast - contrast_factors = contrast_factors[..., tf.newaxis, tf.newaxis, tf.newaxis] + contrast_factors = contrast_factors[ + ..., tf.newaxis, tf.newaxis, tf.newaxis + ] means = tf.reduce_mean(images, axis=(1, 2), keepdims=True) images = (images - means) * contrast_factors + means @@ -96,7 +98,9 @@ def augment_images(self, images, transformations, **kwargs): def augment_labels(self, labels, transformations, **kwargs): return labels - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): return segmentation_masks def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): diff --git a/keras_cv/layers/preprocessing/random_contrast_test.py b/keras_cv/layers/preprocessing/random_contrast_test.py index b20cf6f571..9e1cd1d9b4 100644 --- a/keras_cv/layers/preprocessing/random_contrast_test.py +++ b/keras_cv/layers/preprocessing/random_contrast_test.py @@ -38,7 +38,9 @@ def test_no_adjustment_for_factor_zero(self): def test_with_unit8(self): image_shape = (4, 8, 8, 3) - image = tf.cast(tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8) + image = tf.cast( + tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8 + ) layer = preprocessing.RandomContrast(factor=0) output = layer(image) diff --git a/keras_cv/layers/preprocessing/random_crop.py b/keras_cv/layers/preprocessing/random_crop.py index 3e33449e43..589a311b88 100644 --- a/keras_cv/layers/preprocessing/random_crop.py +++ b/keras_cv/layers/preprocessing/random_crop.py @@ -52,8 +52,12 @@ class RandomCrop(BaseImageAugmentationLayer): seed: Integer. Used to create a random seed. """ - def __init__(self, height, width, seed=None, bounding_box_format=None, **kwargs): - super().__init__(**kwargs, autocast=False, seed=seed, force_generator=True) + def __init__( + self, height, width, seed=None, bounding_box_format=None, **kwargs + ): + super().__init__( + **kwargs, autocast=False, seed=seed, force_generator=True + ) self.height = height self.width = width self.seed = seed @@ -107,7 +111,9 @@ def augment_bounding_boxes( w_diff = image_shape[W_AXIS] - self.width bounding_boxes = tf.cond( tf.reduce_all((h_diff >= 0, w_diff >= 0)), - lambda: self._crop_bounding_boxes(image, bounding_boxes, transformation), + lambda: self._crop_bounding_boxes( + image, bounding_boxes, transformation + ), lambda: self._resize_bounding_boxes( image, bounding_boxes, @@ -130,7 +136,9 @@ def augment_bounding_boxes( def _crop(self, image, transformation): top = transformation["top"] left = transformation["left"] - return tf.image.crop_to_bounding_box(image, top, left, self.height, self.width) + return tf.image.crop_to_bounding_box( + image, top, left, self.height, self.width + ) def _resize(self, image): resizing_layer = tf.keras.layers.Resizing(self.height, self.width) @@ -155,7 +163,9 @@ def _crop_bounding_boxes(self, image, bounding_boxes, transformation): top = tf.cast(transformation["top"], dtype=self.compute_dtype) left = tf.cast(transformation["left"], dtype=self.compute_dtype) output = bounding_boxes.copy() - x1, y1, x2, y2 = tf.split(bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1) + x1, y1, x2, y2 = tf.split( + bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1 + ) output["boxes"] = tf.concat( [ x1 - left, @@ -170,9 +180,15 @@ def _crop_bounding_boxes(self, image, bounding_boxes, transformation): def _resize_bounding_boxes(self, image, bounding_boxes): output = bounding_boxes.copy() image_shape = tf.shape(image) - x_scale = tf.cast(self.width / image_shape[W_AXIS], dtype=self.compute_dtype) - y_scale = tf.cast(self.height / image_shape[H_AXIS], dtype=self.compute_dtype) - x1, y1, x2, y2 = tf.split(bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1) + x_scale = tf.cast( + self.width / image_shape[W_AXIS], dtype=self.compute_dtype + ) + y_scale = tf.cast( + self.height / image_shape[H_AXIS], dtype=self.compute_dtype + ) + x1, y1, x2, y2 = tf.split( + bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1 + ) output["boxes"] = tf.concat( [ x1 * x_scale, diff --git a/keras_cv/layers/preprocessing/random_crop_and_resize.py b/keras_cv/layers/preprocessing/random_crop_and_resize.py index 0af9e5c692..45379ab9df 100644 --- a/keras_cv/layers/preprocessing/random_crop_and_resize.py +++ b/keras_cv/layers/preprocessing/random_crop_and_resize.py @@ -70,7 +70,9 @@ def __init__( ): super().__init__(seed=seed, **kwargs) - self._check_class_arguments(target_size, crop_area_factor, aspect_ratio_factor) + self._check_class_arguments( + target_size, crop_area_factor, aspect_ratio_factor + ) self.target_size = target_size self.aspect_ratio_factor = preprocessing.parse_factor( aspect_ratio_factor, @@ -100,7 +102,9 @@ def get_random_transformation( new_height = tf.clip_by_value( tf.sqrt(crop_area_factor / aspect_ratio), 0.0, 1.0 ) # to avoid unwanted/unintuitive effects - new_width = tf.clip_by_value(tf.sqrt(crop_area_factor * aspect_ratio), 0.0, 1.0) + new_width = tf.clip_by_value( + tf.sqrt(crop_area_factor * aspect_ratio), 0.0, 1.0 + ) height_offset = self._random_generator.random_uniform( (), @@ -158,7 +162,9 @@ def _transform_bounding_boxes(bounding_boxes, transformation): t_y1, t_x1, t_y2, t_x2 = transformation[0] t_dx = t_x2 - t_x1 t_dy = t_y2 - t_y1 - x1, y1, x2, y2 = tf.split(bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1) + x1, y1, x2, y2 = tf.split( + bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1 + ) output = tf.concat( [ (x1 - t_x1) / t_dx, @@ -241,7 +247,9 @@ def _check_class_arguments( ) if ( - not isinstance(aspect_ratio_factor, (tuple, list, core.FactorSampler)) + not isinstance( + aspect_ratio_factor, (tuple, list, core.FactorSampler) + ) or isinstance(aspect_ratio_factor, float) or isinstance(aspect_ratio_factor, int) ): @@ -251,7 +259,9 @@ def _check_class_arguments( f"aspect_ratio_factor={aspect_ratio_factor}" ) - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return self._crop_and_resize( segmentation_mask, transformation, method="nearest" ) @@ -273,11 +283,15 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["crop_area_factor"], dict): - config["crop_area_factor"] = tf.keras.utils.deserialize_keras_object( + config[ + "crop_area_factor" + ] = tf.keras.utils.deserialize_keras_object( config["crop_area_factor"] ) if isinstance(config["aspect_ratio_factor"], dict): - config["aspect_ratio_factor"] = tf.keras.utils.deserialize_keras_object( + config[ + "aspect_ratio_factor" + ] = tf.keras.utils.deserialize_keras_object( config["aspect_ratio_factor"] ) return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_crop_and_resize_test.py b/keras_cv/layers/preprocessing/random_crop_and_resize_test.py index 33ea6f96fa..a11393acc1 100644 --- a/keras_cv/layers/preprocessing/random_crop_and_resize_test.py +++ b/keras_cv/layers/preprocessing/random_crop_and_resize_test.py @@ -166,7 +166,9 @@ def test_augment_one_hot_segmentation_mask(self): mask_shape = (1, self.height, self.width, 1) image = tf.random.uniform(shape=input_image_shape, seed=self.seed) mask = tf.one_hot( - tf.squeeze(np.random.randint(2, size=mask_shape) * (classes - 1), axis=-1), + tf.squeeze( + np.random.randint(2, size=mask_shape) * (classes - 1), axis=-1 + ), classes, ) @@ -205,8 +207,12 @@ def test_augment_bounding_box_single(self): "boxes": tf.convert_to_tensor([[0, 0, 1, 1]], dtype=tf.float32), "classes": tf.convert_to_tensor([0], dtype=tf.float32), } - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) - self.assertAllClose(expected_output["boxes"], output["bounding_boxes"]["boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) + self.assertAllClose( + expected_output["boxes"], output["bounding_boxes"]["boxes"] + ) self.assertAllClose( expected_output["classes"], output["bounding_boxes"]["classes"] ) @@ -240,8 +246,12 @@ def test_augment_boxes_batched_input(self): ), "classes": tf.convert_to_tensor([[0, 0], [0, 0]]), } - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) - self.assertAllClose(expected_output["boxes"], output["bounding_boxes"]["boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) + self.assertAllClose( + expected_output["boxes"], output["bounding_boxes"]["boxes"] + ) self.assertAllClose( expected_output["classes"], output["bounding_boxes"]["classes"] ) @@ -271,8 +281,12 @@ def test_augment_boxes_ragged(self): "classes": tf.ragged.constant([[0, 0], [0]]), } expected_output = bounding_box.to_dense(expected_output) - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) - self.assertAllClose(expected_output["boxes"], output["bounding_boxes"]["boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) + self.assertAllClose( + expected_output["boxes"], output["bounding_boxes"]["boxes"] + ) self.assertAllClose( expected_output["classes"], output["bounding_boxes"]["classes"] ) diff --git a/keras_cv/layers/preprocessing/random_crop_test.py b/keras_cv/layers/preprocessing/random_crop_test.py index df6eac03a7..415b3af643 100644 --- a/keras_cv/layers/preprocessing/random_crop_test.py +++ b/keras_cv/layers/preprocessing/random_crop_test.py @@ -143,7 +143,9 @@ def test_augment_bounding_boxes_crop(self): "classes": tf.convert_to_tensor([1]), } input = {"images": input_image, "bounding_boxes": bboxes} - layer = RandomCrop(height=100, width=200, bounding_box_format="xyxy", seed=10) + layer = RandomCrop( + height=100, width=200, bounding_box_format="xyxy", seed=10 + ) # for top = 300 and left = 305 output = layer(input) expected_output = np.asarray( diff --git a/keras_cv/layers/preprocessing/random_cutout_test.py b/keras_cv/layers/preprocessing/random_cutout_test.py index 423730d94f..f20b62706b 100644 --- a/keras_cv/layers/preprocessing/random_cutout_test.py +++ b/keras_cv/layers/preprocessing/random_cutout_test.py @@ -44,7 +44,9 @@ def _run_test(self, height_factor, width_factor): def test_return_shapes(self): xs = tf.ones((2, 512, 512, 3)) - layer = preprocessing.RandomCutout(height_factor=0.5, width_factor=0.5, seed=1) + layer = preprocessing.RandomCutout( + height_factor=0.5, width_factor=0.5, seed=1 + ) xs = layer(xs) self.assertEqual(xs.shape, [2, 512, 512, 3]) @@ -52,7 +54,9 @@ def test_return_shapes(self): def test_return_shapes_single_element(self): xs = tf.ones((512, 512, 3)) - layer = preprocessing.RandomCutout(height_factor=0.5, width_factor=0.5, seed=1) + layer = preprocessing.RandomCutout( + height_factor=0.5, width_factor=0.5, seed=1 + ) xs = layer(xs) self.assertEqual(xs.shape, [512, 512, 3]) @@ -122,7 +126,9 @@ def test_random_cutout_call_tiny_image(self): def test_in_tf_function(self): xs = tf.cast( - tf.stack([2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0), + tf.stack( + [2 * tf.ones((100, 100, 1)), tf.ones((100, 100, 1))], axis=0 + ), tf.float32, ) diff --git a/keras_cv/layers/preprocessing/random_flip.py b/keras_cv/layers/preprocessing/random_flip.py index 3acbf1e461..5a67be0bb4 100644 --- a/keras_cv/layers/preprocessing/random_flip.py +++ b/keras_cv/layers/preprocessing/random_flip.py @@ -57,7 +57,9 @@ class RandomFlip(BaseImageAugmentationLayer): for more details on supported bounding box formats. """ - def __init__(self, mode=HORIZONTAL, seed=None, bounding_box_format=None, **kwargs): + def __init__( + self, mode=HORIZONTAL, seed=None, bounding_box_format=None, **kwargs + ): super().__init__(seed=seed, force_generator=True, **kwargs) self.mode = mode self.seed = seed @@ -88,9 +90,13 @@ def get_random_transformation(self, **kwargs): flip_horizontal = False flip_vertical = False if self.horizontal: - flip_horizontal = self._random_generator.random_uniform(shape=[]) > 0.5 + flip_horizontal = ( + self._random_generator.random_uniform(shape=[]) > 0.5 + ) if self.vertical: - flip_vertical = self._random_generator.random_uniform(shape=[]) > 0.5 + flip_vertical = ( + self._random_generator.random_uniform(shape=[]) > 0.5 + ) return { "flip_horizontal": tf.cast(flip_horizontal, dtype=tf.bool), "flip_vertical": tf.cast(flip_vertical, dtype=tf.bool), @@ -111,7 +117,9 @@ def _flip_image(image, transformation): return flipped_output def _flip_bounding_boxes_horizontal(bounding_boxes): - x1, x2, x3, x4 = tf.split(bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1) + x1, x2, x3, x4 = tf.split( + bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1 + ) output = tf.stack( [ 1 - x3, @@ -126,7 +134,9 @@ def _flip_bounding_boxes_horizontal(bounding_boxes): return bounding_boxes def _flip_bounding_boxes_vertical(bounding_boxes): - x1, x2, x3, x4 = tf.split(bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1) + x1, x2, x3, x4 = tf.split( + bounding_boxes["boxes"], [1, 1, 1, 1], axis=-1 + ) output = tf.stack( [ x1, diff --git a/keras_cv/layers/preprocessing/random_flip_test.py b/keras_cv/layers/preprocessing/random_flip_test.py index 19a54f336f..e92de89c59 100644 --- a/keras_cv/layers/preprocessing/random_flip_test.py +++ b/keras_cv/layers/preprocessing/random_flip_test.py @@ -37,7 +37,9 @@ def test_horizontal_flip(self): self.assertAllClose(expected_output, actual_output) def test_flip_ragged(self): - images = tf.ragged.stack([tf.ones((512, 512, 3)), tf.ones((1002, 512, 3))]) + images = tf.ragged.stack( + [tf.ones((512, 512, 3)), tf.ones((1002, 512, 3))] + ) bounding_boxes = { "boxes": tf.ragged.stack([tf.ones((5, 4)), tf.ones((3, 4))]), "classes": tf.ragged.stack([tf.ones((5,)), tf.ones((3,))]), @@ -125,7 +127,10 @@ def test_augment_bounding_box_batched_input(self): image = tf.zeros([20, 20, 3]) bounding_boxes = { "boxes": tf.convert_to_tensor( - [[[0, 0, 10, 10], [4, 4, 12, 12]], [[4, 4, 12, 12], [0, 0, 10, 10]]], + [ + [[0, 0, 10, 10], [4, 4, 12, 12]], + [[4, 4, 12, 12], [0, 0, 10, 10]], + ], dtype=tf.float32, ), "classes": tf.convert_to_tensor( @@ -141,7 +146,9 @@ def test_augment_bounding_box_batched_input(self): input = {"images": [image, image], "bounding_boxes": bounding_boxes} mock_random = [0.6, 0.6, 0.6, 0.6] - layer = RandomFlip("horizontal_and_vertical", bounding_box_format="xyxy") + layer = RandomFlip( + "horizontal_and_vertical", bounding_box_format="xyxy" + ) with unittest.mock.patch.object( layer._random_generator, "random_uniform", @@ -166,8 +173,12 @@ def test_augment_bounding_box_batched_input(self): ] ), } - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) - self.assertAllClose(expected_output["boxes"], output["bounding_boxes"]["boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) + self.assertAllClose( + expected_output["boxes"], output["bounding_boxes"]["boxes"] + ) self.assertAllClose( expected_output["classes"], output["bounding_boxes"]["classes"] ) @@ -176,14 +187,17 @@ def test_augment_boxes_ragged(self): image = tf.zeros([2, 20, 20, 3]) bounding_boxes = { "boxes": tf.ragged.constant( - [[[0, 0, 10, 10], [4, 4, 12, 12]], [[0, 0, 10, 10]]], dtype=tf.float32 + [[[0, 0, 10, 10], [4, 4, 12, 12]], [[0, 0, 10, 10]]], + dtype=tf.float32, ), "classes": tf.ragged.constant([[0, 0], [0]], dtype=tf.float32), } input = {"images": image, "bounding_boxes": bounding_boxes} mock_random = [0.6, 0.6, 0.6, 0.6] - layer = RandomFlip("horizontal_and_vertical", bounding_box_format="xyxy") + layer = RandomFlip( + "horizontal_and_vertical", bounding_box_format="xyxy" + ) with unittest.mock.patch.object( layer._random_generator, "random_uniform", @@ -199,9 +213,13 @@ def test_augment_boxes_ragged(self): "classes": tf.ragged.constant([[0, 0], [0]], dtype=tf.float32), } - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) expected_output = bounding_box.to_dense(expected_output) - self.assertAllClose(expected_output["boxes"], output["bounding_boxes"]["boxes"]) + self.assertAllClose( + expected_output["boxes"], output["bounding_boxes"]["boxes"] + ) self.assertAllClose( expected_output["classes"], output["bounding_boxes"]["classes"] ) diff --git a/keras_cv/layers/preprocessing/random_gaussian_blur.py b/keras_cv/layers/preprocessing/random_gaussian_blur.py index f3841f988e..1de2505430 100644 --- a/keras_cv/layers/preprocessing/random_gaussian_blur.py +++ b/keras_cv/layers/preprocessing/random_gaussian_blur.py @@ -92,7 +92,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask @staticmethod @@ -100,10 +102,12 @@ def get_kernel(factor, filter_size): # We are running this in float32, regardless of layer's self.compute_dtype. # Calculating blur_filter in lower precision will corrupt the final results. x = tf.cast( - tf.range(-filter_size // 2 + 1, filter_size // 2 + 1), dtype=tf.float32 + tf.range(-filter_size // 2 + 1, filter_size // 2 + 1), + dtype=tf.float32, ) blur_filter = tf.exp( - -tf.pow(x, 2.0) / (2.0 * tf.pow(tf.cast(factor, dtype=tf.float32), 2.0)) + -tf.pow(x, 2.0) + / (2.0 * tf.pow(tf.cast(factor, dtype=tf.float32), 2.0)) ) blur_filter /= tf.reduce_sum(blur_filter) return blur_filter diff --git a/keras_cv/layers/preprocessing/random_gaussian_blur_test.py b/keras_cv/layers/preprocessing/random_gaussian_blur_test.py index a8417f57c8..0d3c2de71c 100644 --- a/keras_cv/layers/preprocessing/random_gaussian_blur_test.py +++ b/keras_cv/layers/preprocessing/random_gaussian_blur_test.py @@ -19,7 +19,9 @@ class RandomGaussianBlurTest(tf.test.TestCase): def test_return_shapes(self): - layer = preprocessing.RandomGaussianBlur(kernel_size=(3, 7), factor=(0, 2)) + layer = preprocessing.RandomGaussianBlur( + kernel_size=(3, 7), factor=(0, 2) + ) # RGB xs = tf.ones((2, 512, 512, 3)) @@ -32,7 +34,9 @@ def test_return_shapes(self): self.assertEqual(xs.shape, [2, 512, 512, 1]) def test_in_single_image(self): - layer = preprocessing.RandomGaussianBlur(kernel_size=(3, 7), factor=(0, 2)) + layer = preprocessing.RandomGaussianBlur( + kernel_size=(3, 7), factor=(0, 2) + ) # RGB xs = tf.cast( @@ -53,7 +57,9 @@ def test_in_single_image(self): self.assertEqual(xs.shape, [512, 512, 1]) def test_non_square_images(self): - layer = preprocessing.RandomGaussianBlur(kernel_size=(3, 7), factor=(0, 2)) + layer = preprocessing.RandomGaussianBlur( + kernel_size=(3, 7), factor=(0, 2) + ) # RGB xs = tf.ones((2, 256, 512, 3)) @@ -79,7 +85,9 @@ def test_single_input_args(self): self.assertEqual(xs.shape, [2, 512, 512, 1]) def test_numerical(self): - layer = preprocessing.RandomGaussianBlur(kernel_size=3, factor=(1.0, 1.0)) + layer = preprocessing.RandomGaussianBlur( + kernel_size=3, factor=(1.0, 1.0) + ) xs = tf.expand_dims( tf.constant([[0, 0, 0], [0, 1, 0], [0, 0, 0]]), diff --git a/keras_cv/layers/preprocessing/random_hue.py b/keras_cv/layers/preprocessing/random_hue.py index 297d616112..99b5fdaf43 100644 --- a/keras_cv/layers/preprocessing/random_hue.py +++ b/keras_cv/layers/preprocessing/random_hue.py @@ -62,8 +62,12 @@ def __init__(self, factor, value_range, seed=None, **kwargs): self.seed = seed def get_random_transformation_batch(self, batch_size, **kwargs): - invert = self._random_generator.random_uniform((batch_size,), 0, 1, tf.float32) - invert = tf.where(invert > 0.5, -tf.ones_like(invert), tf.ones_like(invert)) + invert = self._random_generator.random_uniform( + (batch_size,), 0, 1, tf.float32 + ) + invert = tf.where( + invert > 0.5, -tf.ones_like(invert), tf.ones_like(invert) + ) # We must scale self.factor() to the range [-0.5, 0.5]. This is because the # tf.image operation performs rotation on the hue saturation value orientation. # This can be thought of as an angle in the range [-180, 180] @@ -99,7 +103,9 @@ def augment_images(self, images, transformations, **kwargs): def augment_labels(self, labels, transformations, **kwargs): return labels - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): return segmentation_masks def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): @@ -117,5 +123,7 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["factor"], dict): - config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + config["factor"] = tf.keras.utils.deserialize_keras_object( + config["factor"] + ) return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_hue_test.py b/keras_cv/layers/preprocessing/random_hue_test.py index 18c1504058..5171417c7a 100644 --- a/keras_cv/layers/preprocessing/random_hue_test.py +++ b/keras_cv/layers/preprocessing/random_hue_test.py @@ -49,10 +49,16 @@ def test_adjust_full_opposite_hue(self): # Make sure the max and min channel are the same between input and output # In the meantime, and channel will swap between each other. self.assertAllClose( - channel_max, tf.math.reduce_max(image, axis=-1), atol=1e-5, rtol=1e-5 + channel_max, + tf.math.reduce_max(image, axis=-1), + atol=1e-5, + rtol=1e-5, ) self.assertAllClose( - channel_min, tf.math.reduce_min(image, axis=-1), atol=1e-5, rtol=1e-5 + channel_min, + tf.math.reduce_min(image, axis=-1), + atol=1e-5, + rtol=1e-5, ) @parameterized.named_parameters( @@ -63,7 +69,9 @@ def test_adjusts_all_values_for_factor(self, factor): # Value range (0, 100) image = tf.random.uniform(shape=image_shape) * 100.0 - layer = preprocessing.RandomHue(factor=(factor, factor), value_range=(0, 255)) + layer = preprocessing.RandomHue( + factor=(factor, factor), value_range=(0, 255) + ) output = layer(image) self.assertNotAllClose(image, output, atol=1e-5, rtol=1e-5) @@ -82,7 +90,9 @@ def test_adjustment_for_non_rgb_value_range(self): def test_with_uint8(self): image_shape = (4, 8, 8, 3) - image = tf.cast(tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8) + image = tf.cast( + tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8 + ) layer = preprocessing.RandomHue(factor=(0.0, 0.0), value_range=(0, 255)) output = layer(image) diff --git a/keras_cv/layers/preprocessing/random_jpeg_quality.py b/keras_cv/layers/preprocessing/random_jpeg_quality.py index 1dd81c7a16..b9b888963a 100644 --- a/keras_cv/layers/preprocessing/random_jpeg_quality.py +++ b/keras_cv/layers/preprocessing/random_jpeg_quality.py @@ -51,7 +51,11 @@ def __init__(self, factor, seed=None, **kwargs): ) self.seed = seed self.factor = preprocessing.parse_factor( - factor, min_value=0, max_value=100, param_name="factor", seed=self.seed + factor, + min_value=0, + max_value=100, + param_name="factor", + seed=self.seed, ) def get_random_transformation(self, **kwargs): @@ -67,7 +71,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): diff --git a/keras_cv/layers/preprocessing/random_rotation.py b/keras_cv/layers/preprocessing/random_rotation.py index a785dbd15d..155cff3497 100644 --- a/keras_cv/layers/preprocessing/random_rotation.py +++ b/keras_cv/layers/preprocessing/random_rotation.py @@ -107,7 +107,9 @@ def __init__( raise ValueError( "Factor cannot have negative values, " "got {}".format(factor) ) - preprocessing.check_fill_mode_and_interpolation(fill_mode, interpolation) + preprocessing.check_fill_mode_and_interpolation( + fill_mode, interpolation + ) self.fill_mode = fill_mode self.fill_value = fill_value self.interpolation = interpolation @@ -238,16 +240,21 @@ def augment_bounding_boxes( def augment_label(self, label, transformation, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): # If segmentation_classes is specified, we have a dense segmentation mask. # We therefore one-hot encode before rotation to avoid bad interpolation # during the rotation transformation. We then make the mask sparse # again using tf.argmax. if self.segmentation_classes: one_hot_mask = tf.one_hot( - tf.squeeze(segmentation_mask, axis=-1), self.segmentation_classes + tf.squeeze(segmentation_mask, axis=-1), + self.segmentation_classes, + ) + rotated_one_hot_mask = self._rotate_image( + one_hot_mask, transformation ) - rotated_one_hot_mask = self._rotate_image(one_hot_mask, transformation) rotated_mask = tf.argmax(rotated_one_hot_mask, axis=-1) return tf.expand_dims(rotated_mask, axis=-1) else: diff --git a/keras_cv/layers/preprocessing/random_rotation_test.py b/keras_cv/layers/preprocessing/random_rotation_test.py index 97b490d8a3..840457c928 100644 --- a/keras_cv/layers/preprocessing/random_rotation_test.py +++ b/keras_cv/layers/preprocessing/random_rotation_test.py @@ -68,7 +68,9 @@ def test_augment_bounding_boxes(self): # 180 rotation. layer = RandomRotation(factor=(0.5, 0.5), bounding_box_format="xyxy") output = layer(input) - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) expected_bounding_boxes = { "boxes": tf.convert_to_tensor( [[112.0, 112.0, 312.0, 312.0], [212.0, 212.0, 412.0, 412.0]], @@ -112,7 +114,10 @@ def test_ragged_bounding_boxes(self): expected_output = { "boxes": tf.ragged.constant( [ - [[112.0, 112.0, 312.0, 312.0], [212.0, 212.0, 412.0, 412.0]], + [ + [112.0, 112.0, 312.0, 312.0], + [212.0, 212.0, 412.0, 412.0], + ], [[112.0, 112.0, 312.0, 312.0]], ], dtype=tf.float32, @@ -129,9 +134,13 @@ def test_ragged_bounding_boxes(self): ), } expected_output = bounding_box.to_dense(expected_output) - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) - self.assertAllClose(expected_output["boxes"], output["bounding_boxes"]["boxes"]) + self.assertAllClose( + expected_output["boxes"], output["bounding_boxes"]["boxes"] + ) self.assertAllClose( expected_output["classes"], output["bounding_boxes"]["classes"] ) @@ -151,13 +160,17 @@ def test_augment_sparse_segmentation_mask(self): outputs = bad_layer(inputs) # 90 degree rotation. - layer = RandomRotation(factor=(0.25, 0.25), segmentation_classes=classes) + layer = RandomRotation( + factor=(0.25, 0.25), segmentation_classes=classes + ) outputs = layer(inputs) expected_masks = np.rot90(masks, axes=(1, 2)) self.assertAllClose(expected_masks, outputs["segmentation_masks"]) # 45 degree rotation. Only verifies that no interpolation takes place. - layer = RandomRotation(factor=(0.125, 0.125), segmentation_classes=classes) + layer = RandomRotation( + factor=(0.125, 0.125), segmentation_classes=classes + ) outputs = layer(inputs) self.assertAllInSet(outputs["segmentation_masks"], [0, 7]) @@ -165,7 +178,9 @@ def test_augment_one_hot_segmentation_mask(self): classes = 8 input_images = np.random.random((2, 20, 20, 3)).astype(np.float32) - masks = tf.one_hot(np.random.randint(classes, size=(2, 20, 20)), classes) + masks = tf.one_hot( + np.random.randint(classes, size=(2, 20, 20)), classes + ) inputs = {"images": input_images, "segmentation_masks": masks} # 90 rotation. diff --git a/keras_cv/layers/preprocessing/random_saturation.py b/keras_cv/layers/preprocessing/random_saturation.py index a12d27cae1..c302147f4b 100644 --- a/keras_cv/layers/preprocessing/random_saturation.py +++ b/keras_cv/layers/preprocessing/random_saturation.py @@ -88,18 +88,24 @@ def augment_images(self, images, transformations, **kwargs): s_channel = tf.multiply( images[..., 1], adjust_factors[..., tf.newaxis, tf.newaxis] ) - s_channel = tf.clip_by_value(s_channel, clip_value_min=0.0, clip_value_max=1.0) + s_channel = tf.clip_by_value( + s_channel, clip_value_min=0.0, clip_value_max=1.0 + ) images = tf.stack([images[..., 0], s_channel, images[..., 2]], axis=-1) images = tf.image.hsv_to_rgb(images) return images - def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + def augment_bounding_boxes( + self, bounding_boxes, transformation=None, **kwargs + ): return bounding_boxes def augment_labels(self, labels, transformations=None, **kwargs): return labels - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): return segmentation_masks def get_config(self): @@ -113,5 +119,7 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["factor"], dict): - config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + config["factor"] = tf.keras.utils.deserialize_keras_object( + config["factor"] + ) return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_saturation_test.py b/keras_cv/layers/preprocessing/random_saturation_test.py index c50e9c470d..12b01943f2 100644 --- a/keras_cv/layers/preprocessing/random_saturation_test.py +++ b/keras_cv/layers/preprocessing/random_saturation_test.py @@ -70,15 +70,21 @@ def augment_image(self, image, transformation=None, **kwargs): # it will be handled correctly when it is a one tensor. transformation = tf.convert_to_tensor(transformation) adjust_factor = transformation / (1 - transformation) - return tf.image.adjust_saturation(image, saturation_factor=adjust_factor) + return tf.image.adjust_saturation( + image, saturation_factor=adjust_factor + ) - def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + def augment_bounding_boxes( + self, bounding_boxes, transformation=None, **kwargs + ): return bounding_boxes def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): @@ -92,7 +98,9 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["factor"], dict): - config["factor"] = tf.keras.utils.deserialize_keras_object(config["factor"]) + config["factor"] = tf.keras.utils.deserialize_keras_object( + config["factor"] + ) return cls(**config) @@ -128,7 +136,9 @@ def test_adjust_to_grayscale(self): # Make sure all the pixel has the same value among the channel dim, which is # a fully gray RGB. for channel_value in channel_values: - self.assertAllClose(channel_mean, channel_value, atol=1e-5, rtol=1e-5) + self.assertAllClose( + channel_mean, channel_value, atol=1e-5, rtol=1e-5 + ) def test_adjust_to_full_saturation(self): image_shape = (4, 8, 8, 3) @@ -156,7 +166,9 @@ def test_adjustment_for_non_rgb_value_range(self): def test_with_unit8(self): image_shape = (4, 8, 8, 3) - image = tf.cast(tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8) + image = tf.cast( + tf.random.uniform(shape=image_shape) * 255.0, dtype=tf.uint8 + ) layer = preprocessing.RandomSaturation(factor=(0.5, 0.5)) output = layer(image) diff --git a/keras_cv/layers/preprocessing/random_sharpness.py b/keras_cv/layers/preprocessing/random_sharpness.py index 6fc61eea98..8b44aa5ebb 100644 --- a/keras_cv/layers/preprocessing/random_sharpness.py +++ b/keras_cv/layers/preprocessing/random_sharpness.py @@ -125,12 +125,18 @@ def augment_bounding_boxes(self, bounding_boxes, transformation, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): config = super().get_config() config.update( - {"factor": self.factor, "value_range": self.value_range, "seed": self.seed} + { + "factor": self.factor, + "value_range": self.value_range, + "seed": self.seed, + } ) return config diff --git a/keras_cv/layers/preprocessing/random_shear.py b/keras_cv/layers/preprocessing/random_shear.py index fd85a88e75..da85c28d32 100644 --- a/keras_cv/layers/preprocessing/random_shear.py +++ b/keras_cv/layers/preprocessing/random_shear.py @@ -174,13 +174,17 @@ def augment_bounding_boxes( bounding_boxes["boxes"] ) if x is not None: - extended_boxes = self._apply_horizontal_transformation_to_bounding_box( - extended_boxes, x + extended_boxes = ( + self._apply_horizontal_transformation_to_bounding_box( + extended_boxes, x + ) ) # apply vertical shear if y is not None: - extended_boxes = self._apply_vertical_transformation_to_bounding_box( - extended_boxes, y + extended_boxes = ( + self._apply_vertical_transformation_to_bounding_box( + extended_boxes, y + ) ) boxes = self._convert_to_four_coordinate(extended_boxes, x, y) @@ -275,7 +279,9 @@ def negative_case_y(): ) @staticmethod - def _apply_horizontal_transformation_to_bounding_box(extended_bounding_boxes, x): + def _apply_horizontal_transformation_to_bounding_box( + extended_bounding_boxes, x + ): # create transformation matrix [1,4] matrix = tf.stack([1.0, -x, 0, 1.0], axis=0) # reshape it to [2,2] @@ -289,7 +295,9 @@ def _apply_horizontal_transformation_to_bounding_box(extended_bounding_boxes, x) return transformed_bboxes @staticmethod - def _apply_vertical_transformation_to_bounding_box(extended_bounding_boxes, y): + def _apply_vertical_transformation_to_bounding_box( + extended_bounding_boxes, y + ): # create transformation matrix [1,4] matrix = tf.stack([1.0, 0, -y, 1.0], axis=0) # reshape it to [2,2] diff --git a/keras_cv/layers/preprocessing/random_shear_test.py b/keras_cv/layers/preprocessing/random_shear_test.py index 02c9e34569..4d3179e566 100644 --- a/keras_cv/layers/preprocessing/random_shear_test.py +++ b/keras_cv/layers/preprocessing/random_shear_test.py @@ -63,7 +63,11 @@ def test_return_shapes(self): ) outputs = layer( - {"images": xs, "targets": ys_labels, "bounding_boxes": ys_bounding_boxes} + { + "images": xs, + "targets": ys_labels, + "bounding_boxes": ys_bounding_boxes, + } ) xs, ys_labels, ys_bounding_boxes = ( outputs["images"], @@ -109,8 +113,16 @@ def test_area(self): outputs["bounding_boxes"]["boxes"], ) new_area = tf.math.multiply( - tf.abs(tf.subtract(ys_bounding_boxes[..., 2], ys_bounding_boxes[..., 0])), - tf.abs(tf.subtract(ys_bounding_boxes[..., 3], ys_bounding_boxes[..., 1])), + tf.abs( + tf.subtract( + ys_bounding_boxes[..., 2], ys_bounding_boxes[..., 0] + ) + ), + tf.abs( + tf.subtract( + ys_bounding_boxes[..., 3], ys_bounding_boxes[..., 1] + ) + ), ) old_area = tf.math.multiply( tf.abs(tf.subtract(ys["boxes"][..., 2], ys["boxes"][..., 0])), @@ -189,8 +201,12 @@ def DISABLED_test_output_values(self): ys = tf.cast( tf.stack( [ - tf.constant([[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]]), - tf.constant([[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]]), + tf.constant( + [[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]] + ), + tf.constant( + [[10.0, 20.0, 40.0, 50.0], [12.0, 22.0, 42.0, 54.0]] + ), ], axis=0, ), diff --git a/keras_cv/layers/preprocessing/random_translation_test.py b/keras_cv/layers/preprocessing/random_translation_test.py index df0c65f395..1c0afa6487 100644 --- a/keras_cv/layers/preprocessing/random_translation_test.py +++ b/keras_cv/layers/preprocessing/random_translation_test.py @@ -22,7 +22,9 @@ class RandomTranslationTest(tf.test.TestCase, parameterized.TestCase): def test_random_translation_up_numeric_reflect(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) # Shifting by -.2 * 5 = 1 pixel. layer = preprocessing.RandomTranslation( height_factor=(-0.2, -0.2), width_factor=0.0 @@ -42,7 +44,9 @@ def test_random_translation_up_numeric_reflect(self): def test_random_translation_up_numeric_constant(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) # Shifting by -.2 * 5 = 1 pixel. layer = preprocessing.RandomTranslation( height_factor=(-0.2, -0.2), @@ -64,7 +68,9 @@ def test_random_translation_up_numeric_constant(self): def test_random_translation_down_numeric_reflect(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) # Shifting by .2 * 5 = 1 pixel. layer = preprocessing.RandomTranslation( height_factor=(0.2, 0.2), width_factor=0.0 @@ -84,7 +90,9 @@ def test_random_translation_down_numeric_reflect(self): def test_random_translation_asymmetric_size_numeric_reflect(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype(dtype) + input_image = np.reshape(np.arange(0, 16), (1, 8, 2, 1)).astype( + dtype + ) # Shifting by .5 * 8 = 1 pixel. layer = preprocessing.RandomTranslation( height_factor=(0.5, 0.5), width_factor=0.0 @@ -109,7 +117,9 @@ def test_random_translation_asymmetric_size_numeric_reflect(self): def test_random_translation_down_numeric_constant(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) # Shifting by -.2 * 5 = 1 pixel. layer = preprocessing.RandomTranslation( height_factor=(0.2, 0.2), @@ -131,7 +141,9 @@ def test_random_translation_down_numeric_constant(self): def test_random_translation_left_numeric_reflect(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) # Shifting by .2 * 5 = 1 pixel. layer = preprocessing.RandomTranslation( height_factor=0.0, width_factor=(-0.2, -0.2) @@ -151,7 +163,9 @@ def test_random_translation_left_numeric_reflect(self): def test_random_translation_left_numeric_constant(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype(dtype) + input_image = np.reshape(np.arange(0, 25), (1, 5, 5, 1)).astype( + dtype + ) # Shifting by -.2 * 5 = 1 pixel. layer = preprocessing.RandomTranslation( height_factor=0.0, diff --git a/keras_cv/layers/preprocessing/random_zoom.py b/keras_cv/layers/preprocessing/random_zoom.py index 8b855e170a..649310e79a 100644 --- a/keras_cv/layers/preprocessing/random_zoom.py +++ b/keras_cv/layers/preprocessing/random_zoom.py @@ -129,7 +129,9 @@ def __init__( f"got {width_factor}" ) - preprocessing.check_fill_mode_and_interpolation(fill_mode, interpolation) + preprocessing.check_fill_mode_and_interpolation( + fill_mode, interpolation + ) self.fill_mode = fill_mode self.fill_value = fill_value @@ -162,7 +164,9 @@ def augment_image(self, image, transformation, **kwargs): img_wd = tf.cast(image_shape[W_AXIS], tf.float32) width_zoom = transformation["width_zoom"] height_zoom = transformation["height_zoom"] - zooms = tf.cast(tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32) + zooms = tf.cast( + tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32 + ) output = preprocessing.transform( image, self.get_zoom_matrix(zooms, img_hd, img_wd), diff --git a/keras_cv/layers/preprocessing/random_zoom_test.py b/keras_cv/layers/preprocessing/random_zoom_test.py index 52154f8114..f3f5b0cfd0 100644 --- a/keras_cv/layers/preprocessing/random_zoom_test.py +++ b/keras_cv/layers/preprocessing/random_zoom_test.py @@ -52,7 +52,9 @@ def test_output_shapes(self, height_factor, width_factor): def test_random_zoom_in_numeric(self): for dtype in (np.int64, np.float32): input_image = np.reshape(np.arange(0, 25), (5, 5, 1)).astype(dtype) - layer = RandomZoom((-0.5, -0.5), (-0.5, -0.5), interpolation="nearest") + layer = RandomZoom( + (-0.5, -0.5), (-0.5, -0.5), interpolation="nearest" + ) output_image = layer(np.expand_dims(input_image, axis=0)) expected_output = np.asarray( [ diff --git a/keras_cv/layers/preprocessing/randomly_zoomed_crop.py b/keras_cv/layers/preprocessing/randomly_zoomed_crop.py index b3a034bdef..d59d2f6617 100644 --- a/keras_cv/layers/preprocessing/randomly_zoomed_crop.py +++ b/keras_cv/layers/preprocessing/randomly_zoomed_crop.py @@ -78,7 +78,9 @@ def __init__( seed=seed, ) - self._check_class_arguments(height, width, zoom_factor, aspect_ratio_factor) + self._check_class_arguments( + height, width, zoom_factor, aspect_ratio_factor + ) self.force_output_dense_images = True self.interpolation = interpolation self.seed = seed @@ -180,12 +182,18 @@ def _resize(self, image): # smart_resize will always output float32, so we need to re-cast. return tf.cast(outputs, self.compute_dtype) - def _check_class_arguments(self, height, width, zoom_factor, aspect_ratio_factor): + def _check_class_arguments( + self, height, width, zoom_factor, aspect_ratio_factor + ): if not isinstance(height, int): - raise ValueError("`height` must be an integer. Received height={height}") + raise ValueError( + "`height` must be an integer. Received height={height}" + ) if not isinstance(width, int): - raise ValueError("`width` must be an integer. Received width={width}") + raise ValueError( + "`width` must be an integer. Received width={width}" + ) if ( not isinstance(zoom_factor, (tuple, list, core.FactorSampler)) @@ -199,7 +207,9 @@ def _check_class_arguments(self, height, width, zoom_factor, aspect_ratio_factor ) if ( - not isinstance(aspect_ratio_factor, (tuple, list, core.FactorSampler)) + not isinstance( + aspect_ratio_factor, (tuple, list, core.FactorSampler) + ) or isinstance(aspect_ratio_factor, float) or isinstance(aspect_ratio_factor, int) ): @@ -233,7 +243,9 @@ def from_config(cls, config): config["zoom_factor"] ) if isinstance(config["aspect_ratio_factor"], dict): - config["aspect_ratio_factor"] = tf.keras.utils.deserialize_keras_object( + config[ + "aspect_ratio_factor" + ] = tf.keras.utils.deserialize_keras_object( config["aspect_ratio_factor"] ) return cls(**config) diff --git a/keras_cv/layers/preprocessing/repeated_augmentation.py b/keras_cv/layers/preprocessing/repeated_augmentation.py index aa7bdc8e76..aa44ca361d 100644 --- a/keras_cv/layers/preprocessing/repeated_augmentation.py +++ b/keras_cv/layers/preprocessing/repeated_augmentation.py @@ -87,14 +87,18 @@ def _batch_augment(self, inputs): outputs = {} for k in inputs.keys(): - outputs[k] = tf.concat([output[k] for output in augmenter_outputs], axis=0) + outputs[k] = tf.concat( + [output[k] for output in augmenter_outputs], axis=0 + ) if not self.shuffle: return outputs return self.shuffle_outputs(outputs) def shuffle_outputs(self, result): - indices = tf.range(start=0, limit=tf.shape(result["images"])[0], dtype=tf.int32) + indices = tf.range( + start=0, limit=tf.shape(result["images"])[0], dtype=tf.int32 + ) indices = tf.random.shuffle(indices) for key in result: result[key] = tf.gather(result[key], indices) diff --git a/keras_cv/layers/preprocessing/rescaling.py b/keras_cv/layers/preprocessing/rescaling.py index 8c6bfaa66c..ed33e37e25 100644 --- a/keras_cv/layers/preprocessing/rescaling.py +++ b/keras_cv/layers/preprocessing/rescaling.py @@ -68,10 +68,14 @@ def augment_image(self, image, transformation, **kwargs): def augment_label(self, label, transformation, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask - def augment_bounding_boxes(self, bounding_boxes, transformation=None, **kwargs): + def augment_bounding_boxes( + self, bounding_boxes, transformation=None, **kwargs + ): return bounding_boxes def get_config(self): diff --git a/keras_cv/layers/preprocessing/resizing.py b/keras_cv/layers/preprocessing/resizing.py index 859debdf3c..d7fbfd18a4 100644 --- a/keras_cv/layers/preprocessing/resizing.py +++ b/keras_cv/layers/preprocessing/resizing.py @@ -80,7 +80,9 @@ def __init__( self.interpolation = interpolation self.crop_to_aspect_ratio = crop_to_aspect_ratio self.pad_to_aspect_ratio = pad_to_aspect_ratio - self._interpolation_method = keras_cv.utils.get_interpolation(interpolation) + self._interpolation_method = keras_cv.utils.get_interpolation( + interpolation + ) self.bounding_box_format = bounding_box_format self.force_output_dense_images = True @@ -118,7 +120,9 @@ def _augment(self, inputs): bounding_boxes["classes"] = tf.expand_dims( bounding_boxes["classes"], axis=0 ) - bounding_boxes["boxes"] = tf.expand_dims(bounding_boxes["boxes"], axis=0) + bounding_boxes["boxes"] = tf.expand_dims( + bounding_boxes["boxes"], axis=0 + ) inputs["bounding_boxes"] = bounding_boxes outputs = self._batch_augment(inputs) @@ -142,7 +146,9 @@ def _resize_with_distortion(self, inputs): images = inputs.get("images", None) size = [self.height, self.width] - images = tf.image.resize(images, size=size, method=self._interpolation_method) + images = tf.image.resize( + images, size=size, method=self._interpolation_method + ) images = tf.cast(images, self.compute_dtype) inputs["images"] = images @@ -189,7 +195,9 @@ def resize_single_with_pad_to_aspect(x): source="rel_xyxy", target="xyxy", ) - image = tf.image.pad_to_bounding_box(image, 0, 0, self.height, self.width) + image = tf.image.pad_to_bounding_box( + image, 0, 0, self.height, self.width + ) if bounding_boxes is not None: bounding_boxes = keras_cv.bounding_box.clip_to_image( bounding_boxes, images=image, bounding_box_format="xyxy" diff --git a/keras_cv/layers/preprocessing/resizing_test.py b/keras_cv/layers/preprocessing/resizing_test.py index 0f331833f8..9656b59d4e 100644 --- a/keras_cv/layers/preprocessing/resizing_test.py +++ b/keras_cv/layers/preprocessing/resizing_test.py @@ -69,8 +69,12 @@ def test_up_sampling(self, kwargs, expected_height, expected_width): def test_down_sampling_numeric(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype(dtype) - layer = cv_layers.Resizing(height=2, width=2, interpolation="nearest") + input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype( + dtype + ) + layer = cv_layers.Resizing( + height=2, width=2, interpolation="nearest" + ) output_image = layer(input_image) # pyformat: disable expected_output = np.asarray([[5, 7], [13, 15]]).astype(dtype) @@ -80,8 +84,12 @@ def test_down_sampling_numeric(self): def test_up_sampling_numeric(self): for dtype in (np.int64, np.float32): - input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype(dtype) - layer = cv_layers.Resizing(height=4, width=4, interpolation="nearest") + input_image = np.reshape(np.arange(0, 4), (1, 2, 2, 1)).astype( + dtype + ) + layer = cv_layers.Resizing( + height=4, width=4, interpolation="nearest" + ) output_image = layer(input_image) # pyformat: disable expected_output = np.asarray( @@ -108,7 +116,9 @@ def test_config_with_custom_name(self): self.assertEqual(layer_1.name, layer.name) def test_crop_to_aspect_ratio(self): - input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype("float32") + input_image = np.reshape(np.arange(0, 16), (1, 4, 4, 1)).astype( + "float32" + ) layer = cv_layers.Resizing(4, 2, crop_to_aspect_ratio=True) output_image = layer(input_image) expected_output = np.asarray( @@ -169,7 +179,9 @@ def test_ragged_image(self, crop_to_aspect_ratio): def test_raises_with_segmap(self): inputs = { "images": np.array([[[1], [2]], [[3], [4]]], dtype="float64"), - "segmentation_map": np.array([[[1], [2]], [[3], [4]]], dtype="float64"), + "segmentation_map": np.array( + [[[1], [2]], [[3], [4]]], dtype="float64" + ), } layer = cv_layers.Resizing(2, 2) with self.assertRaises(ValueError): @@ -302,4 +314,6 @@ def test_pad_to_size_with_bounding_boxes_ragged_images_upsample(self): ) self.assertAllEqual(outputs["images"][1][:, :8, :], tf.ones((16, 8, 3))) - self.assertAllEqual(outputs["images"][1][:, -8:, :], tf.zeros((16, 8, 3))) + self.assertAllEqual( + outputs["images"][1][:, -8:, :], tf.zeros((16, 8, 3)) + ) diff --git a/keras_cv/layers/preprocessing/solarization.py b/keras_cv/layers/preprocessing/solarization.py index 7272574e7e..fe1ea5258e 100644 --- a/keras_cv/layers/preprocessing/solarization.py +++ b/keras_cv/layers/preprocessing/solarization.py @@ -80,10 +80,16 @@ def __init__( super().__init__(seed=seed, **kwargs) self.seed = seed self.addition_factor = preprocessing.parse_factor( - addition_factor, max_value=255, seed=seed, param_name="addition_factor" + addition_factor, + max_value=255, + seed=seed, + param_name="addition_factor", ) self.threshold_factor = preprocessing.parse_factor( - threshold_factor, max_value=255, seed=seed, param_name="threshold_factor" + threshold_factor, + max_value=255, + seed=seed, + param_name="threshold_factor", ) self.value_range = value_range @@ -118,7 +124,9 @@ def augment_bounding_boxes(self, bounding_boxes, **kwargs): def augment_label(self, label, transformation=None, **kwargs): return label - def augment_segmentation_mask(self, segmentation_mask, transformation, **kwargs): + def augment_segmentation_mask( + self, segmentation_mask, transformation, **kwargs + ): return segmentation_mask def get_config(self): @@ -134,7 +142,9 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["threshold_factor"], dict): - config["threshold_factor"] = tf.keras.utils.deserialize_keras_object( + config[ + "threshold_factor" + ] = tf.keras.utils.deserialize_keras_object( config["threshold_factor"] ) if isinstance(config["addition_factor"], dict): diff --git a/keras_cv/layers/preprocessing/solarization_test.py b/keras_cv/layers/preprocessing/solarization_test.py index 166cf6654f..e42224cc76 100644 --- a/keras_cv/layers/preprocessing/solarization_test.py +++ b/keras_cv/layers/preprocessing/solarization_test.py @@ -40,7 +40,9 @@ def test_output_values(self, input_value, expected_value): ("255_0", 255, 0), ) def test_solarization_with_addition(self, input_value, output_value): - solarization = Solarization(addition_factor=(10.0, 10.0), value_range=(0, 255)) + solarization = Solarization( + addition_factor=(10.0, 10.0), value_range=(0, 255) + ) self._test_input_output( layer=solarization, input_value=input_value, @@ -55,8 +57,12 @@ def test_solarization_with_addition(self, input_value, output_value): ("191_64", 191, 64), ("255_0", 255, 0), ) - def test_only_values_above_threshold_are_solarized(self, input_value, output_value): - solarization = Solarization(threshold_factor=(128, 128), value_range=(0, 255)) + def test_only_values_above_threshold_are_solarized( + self, input_value, output_value + ): + solarization = Solarization( + threshold_factor=(128, 128), value_range=(0, 255) + ) self._test_input_output( layer=solarization, diff --git a/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py index 5754b280dc..20622ba590 100644 --- a/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py +++ b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer.py @@ -195,7 +195,9 @@ def augment_keypoints(self, keypoints, transformations, **kwargs): """ raise NotImplementedError() - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): """Augment a batch of images' segmentation masks during training. Args: @@ -279,7 +281,9 @@ def _batch_augment(self, inputs): if isinstance(images, tf.RaggedTensor): inputs_for_raggeds = {"transformations": transformations, **inputs} print("inputs_for_raggeds", inputs_for_raggeds) - print("self._unwrap_ragged_image_call", self._unwrap_ragged_image_call) + print( + "self._unwrap_ragged_image_call", self._unwrap_ragged_image_call + ) images = tf.map_fn( self._unwrap_ragged_image_call, inputs_for_raggeds, @@ -341,7 +345,9 @@ def call(self, inputs, training=True): inputs, metadata = self._format_inputs(inputs) images = inputs[IMAGES] if images.shape.rank == 3 or images.shape.rank == 4: - return self._format_output(self._batch_augment(inputs), metadata) + return self._format_output( + self._batch_augment(inputs), metadata + ) else: raise ValueError( "Image augmentation layers are expecting inputs to be " @@ -369,7 +375,9 @@ def _format_inputs(self, inputs): ) if BOUNDING_BOXES in inputs: - inputs[BOUNDING_BOXES] = self._format_bounding_boxes(inputs[BOUNDING_BOXES]) + inputs[BOUNDING_BOXES] = self._format_bounding_boxes( + inputs[BOUNDING_BOXES] + ) if isinstance(inputs, dict) and TARGETS in inputs: # TODO(scottzhu): Check if it only contains the valid keys diff --git a/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py index 6f473cb8b7..a9b90eb9f4 100644 --- a/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py +++ b/keras_cv/layers/preprocessing/vectorized_base_image_augmentation_layer_test.py @@ -51,7 +51,9 @@ def augment_bounding_boxes(self, bounding_boxes, transformations, **kwargs): def augment_keypoints(self, keypoints, transformations, **kwargs): return keypoints + transformations[:, None, None] - def augment_segmentation_masks(self, segmentation_masks, transformations, **kwargs): + def augment_segmentation_masks( + self, segmentation_masks, transformations, **kwargs + ): return segmentation_masks + transformations[:, None, None, None] @@ -75,7 +77,9 @@ def test_augment_casts_dtypes(self): images = tf.ones((2, 8, 8, 3), dtype="uint8") output = add_layer(images) - self.assertAllClose(tf.ones((2, 8, 8, 3), dtype="float32") * 3.0, output) + self.assertAllClose( + tf.ones((2, 8, 8, 3), dtype="float32") * 3.0, output + ) def test_augment_batch_images(self): add_layer = VectorizedRandomAddLayer() @@ -142,7 +146,9 @@ def test_augment_image_and_localization_data(self): "classes": np.random.random(size=(8, 3)).astype("float32"), } keypoints = np.random.random(size=(8, 5, 2)).astype("float32") - segmentation_mask = np.random.random(size=(8, 8, 8, 1)).astype("float32") + segmentation_mask = np.random.random(size=(8, 8, 8, 1)).astype( + "float32" + ) output = add_layer( { @@ -164,7 +170,9 @@ def test_augment_image_and_localization_data(self): "segmentation_masks": segmentation_mask + 2.0, } - output["bounding_boxes"] = bounding_box.to_dense(output["bounding_boxes"]) + output["bounding_boxes"] = bounding_box.to_dense( + output["bounding_boxes"] + ) self.assertAllClose(output["images"], expected_output["images"]) self.assertAllClose(output["keypoints"], expected_output["keypoints"]) @@ -188,7 +196,9 @@ def test_augment_batch_image_and_localization_data(self): "classes": np.random.random(size=(2, 3)).astype("float32"), } keypoints = np.random.random(size=(2, 5, 2)).astype("float32") - segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32") + segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype( + "float32" + ) output = add_layer( { @@ -203,10 +213,14 @@ def test_augment_batch_image_and_localization_data(self): output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] ) keypoints_diff = output["keypoints"] - keypoints - segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + segmentation_mask_diff = ( + output["segmentation_masks"] - segmentation_masks + ) self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) - self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) + self.assertNotAllClose( + segmentation_mask_diff[0], segmentation_mask_diff[1] + ) @tf.function def in_tf_function(inputs): @@ -225,10 +239,14 @@ def in_tf_function(inputs): output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] ) keypoints_diff = output["keypoints"] - keypoints - segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + segmentation_mask_diff = ( + output["segmentation_masks"] - segmentation_masks + ) self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) - self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) + self.assertNotAllClose( + segmentation_mask_diff[0], segmentation_mask_diff[1] + ) def test_augment_all_data_in_tf_function(self): add_layer = VectorizedRandomAddLayer() @@ -238,7 +256,9 @@ def test_augment_all_data_in_tf_function(self): "classes": np.random.random(size=(2, 3)).astype("float32"), } keypoints = np.random.random(size=(2, 5, 2)).astype("float32") - segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype("float32") + segmentation_masks = np.random.random(size=(2, 8, 8, 1)).astype( + "float32" + ) @tf.function def in_tf_function(inputs): @@ -257,7 +277,11 @@ def in_tf_function(inputs): output["bounding_boxes"]["boxes"] - bounding_boxes["boxes"] ) keypoints_diff = output["keypoints"] - keypoints - segmentation_mask_diff = output["segmentation_masks"] - segmentation_masks + segmentation_mask_diff = ( + output["segmentation_masks"] - segmentation_masks + ) self.assertNotAllClose(bounding_boxes_diff[0], bounding_boxes_diff[1]) self.assertNotAllClose(keypoints_diff[0], keypoints_diff[1]) - self.assertNotAllClose(segmentation_mask_diff[0], segmentation_mask_diff[1]) + self.assertNotAllClose( + segmentation_mask_diff[0], segmentation_mask_diff[1] + ) diff --git a/keras_cv/layers/preprocessing/with_segmentation_masks_test.py b/keras_cv/layers/preprocessing/with_segmentation_masks_test.py index 80b140b260..ea6e4124bb 100644 --- a/keras_cv/layers/preprocessing/with_segmentation_masks_test.py +++ b/keras_cv/layers/preprocessing/with_segmentation_masks_test.py @@ -61,7 +61,11 @@ preprocessing.RandomGaussianBlur, {"kernel_size": 3, "factor": (0.0, 3.0)}, ), - ("RandomJpegQuality", preprocessing.RandomJpegQuality, {"factor": (75, 100)}), + ( + "RandomJpegQuality", + preprocessing.RandomJpegQuality, + {"factor": (75, 100)}, + ), ("RandomSaturation", preprocessing.RandomSaturation, {"factor": 0.5}), ( "RandomSharpness", @@ -92,12 +96,16 @@ def test_can_run_with_segmentation_masks(self, layer_cls, init_args): # This currently asserts that all layers are no-ops. # When preprocessing layers are updated to mutate segmentation masks, # this condition should only be asserted for no-op layers. - self.assertAllClose(inputs["segmentation_masks"], outputs["segmentation_masks"]) + self.assertAllClose( + inputs["segmentation_masks"], outputs["segmentation_masks"] + ) # This has to be a separate test case to exclude CutMix and MixUp # (which are not yet supported for segmentation mask augmentation) @parameterized.named_parameters(*TEST_CONFIGURATIONS) - def test_can_run_with_segmentation_mask_single_image(self, layer_cls, init_args): + def test_can_run_with_segmentation_mask_single_image( + self, layer_cls, init_args + ): classes = 10 layer = layer_cls(**init_args) img = tf.random.uniform( @@ -114,4 +122,6 @@ def test_can_run_with_segmentation_mask_single_image(self, layer_cls, init_args) # This currently asserts that all layers are no-ops. # When preprocessing layers are updated to mutate segmentation masks, # this condition should only be asserted for no-op layers. - self.assertAllClose(inputs["segmentation_masks"], outputs["segmentation_masks"]) + self.assertAllClose( + inputs["segmentation_masks"], outputs["segmentation_masks"] + ) diff --git a/keras_cv/layers/preprocessing_3d/__init__.py b/keras_cv/layers/preprocessing_3d/__init__.py index 5a780b3f89..e18e543168 100644 --- a/keras_cv/layers/preprocessing_3d/__init__.py +++ b/keras_cv/layers/preprocessing_3d/__init__.py @@ -25,8 +25,12 @@ GlobalRandomDroppingPoints, ) from keras_cv.layers.preprocessing_3d.global_random_flip import GlobalRandomFlip -from keras_cv.layers.preprocessing_3d.global_random_rotation import GlobalRandomRotation -from keras_cv.layers.preprocessing_3d.global_random_scaling import GlobalRandomScaling +from keras_cv.layers.preprocessing_3d.global_random_rotation import ( + GlobalRandomRotation, +) +from keras_cv.layers.preprocessing_3d.global_random_scaling import ( + GlobalRandomScaling, +) from keras_cv.layers.preprocessing_3d.global_random_translation import ( GlobalRandomTranslation, ) diff --git a/keras_cv/layers/preprocessing_3d/base_augmentation_layer_3d.py b/keras_cv/layers/preprocessing_3d/base_augmentation_layer_3d.py index 29a84b50dc..f98b038bf7 100644 --- a/keras_cv/layers/preprocessing_3d/base_augmentation_layer_3d.py +++ b/keras_cv/layers/preprocessing_3d/base_augmentation_layer_3d.py @@ -169,7 +169,9 @@ def call(self, inputs, training=True): bounding_boxes = inputs[BOUNDING_BOXES] if point_clouds.shape.rank == 3 and bounding_boxes.shape.rank == 3: return self._augment(inputs) - elif point_clouds.shape.rank == 4 and bounding_boxes.shape.rank == 4: + elif ( + point_clouds.shape.rank == 4 and bounding_boxes.shape.rank == 4 + ): return self._batch_augment(inputs) else: raise ValueError( diff --git a/keras_cv/layers/preprocessing_3d/base_augmentation_layer_3d_test.py b/keras_cv/layers/preprocessing_3d/base_augmentation_layer_3d_test.py index d3be5121f3..74f4ce13a7 100644 --- a/keras_cv/layers/preprocessing_3d/base_augmentation_layer_3d_test.py +++ b/keras_cv/layers/preprocessing_3d/base_augmentation_layer_3d_test.py @@ -36,7 +36,9 @@ def get_random_transformation(self, **kwargs): (), mean=0.0, stddev=self._translate_noise[2] ) - return {"pose": tf.stack([random_x, random_y, random_z, 0, 0, 0], axis=0)} + return { + "pose": tf.stack([random_x, random_y, random_z, 0, 0, 0], axis=0) + } def augment_point_clouds_bounding_boxes( self, point_clouds, bounding_boxes, transformation, **kwargs @@ -51,7 +53,9 @@ def augment_point_clouds_bounding_boxes( ) -class VectorizeDisabledLayer(base_augmentation_layer_3d.BaseAugmentationLayer3D): +class VectorizeDisabledLayer( + base_augmentation_layer_3d.BaseAugmentationLayer3D +): def __init__(self, **kwargs): self.auto_vectorize = False super().__init__(**kwargs) diff --git a/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points.py b/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points.py index 0aca6cb64f..1efab73e6f 100644 --- a/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points.py +++ b/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points.py @@ -23,7 +23,9 @@ @tf.keras.utils.register_keras_serializable(package="keras_cv") -class FrustumRandomDroppingPoints(base_augmentation_layer_3d.BaseAugmentationLayer3D): +class FrustumRandomDroppingPoints( + base_augmentation_layer_3d.BaseAugmentationLayer3D +): """A preprocessing layer which randomly drops point within a randomly generated frustum during training. This layer will randomly select a point from the point cloud as the center of a frustum then generate a frustum based @@ -50,20 +52,28 @@ class FrustumRandomDroppingPoints(base_augmentation_layer_3d.BaseAugmentationLay drop_rate: A float scalar sets the probability threshold for dropping the points. """ - def __init__(self, r_distance, theta_width, phi_width, drop_rate=None, **kwargs): + def __init__( + self, r_distance, theta_width, phi_width, drop_rate=None, **kwargs + ): super().__init__(**kwargs) if r_distance < 0: - raise ValueError(f"r_distance must be >=0, but got r_distance={r_distance}") + raise ValueError( + f"r_distance must be >=0, but got r_distance={r_distance}" + ) if theta_width < 0: raise ValueError( f"theta_width must be >=0, but got theta_width={theta_width}" ) if phi_width < 0: - raise ValueError(f"phi_width must be >=0, but got phi_width={phi_width}") + raise ValueError( + f"phi_width must be >=0, but got phi_width={phi_width}" + ) drop_rate = drop_rate if drop_rate else 0.0 if drop_rate > 1: - raise ValueError(f"drop_rate must be <=1, but got drop_rate={drop_rate}") + raise ValueError( + f"drop_rate must be <=1, but got drop_rate={drop_rate}" + ) self._r_distance = r_distance self._theta_width = theta_width diff --git a/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points_test.py b/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points_test.py index 4981653eef..54407c93d6 100644 --- a/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points_test.py +++ b/keras_cv/layers/preprocessing_3d/frustum_random_dropping_points_test.py @@ -44,7 +44,9 @@ def test_not_augment_drop_rate0_point_clouds_and_bounding_boxes(self): outputs = add_layer(inputs) self.assertAllClose(inputs, outputs) - def test_not_augment_drop_rate1_frustum_empty_point_clouds_and_bounding_boxes(self): + def test_not_augment_drop_rate1_frustum_empty_point_clouds_and_bounding_boxes( + self, + ): add_layer = FrustumRandomDroppingPoints( r_distance=10, theta_width=0, phi_width=0, drop_rate=1.0 ) diff --git a/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py b/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py index f40756180a..7c321598a8 100644 --- a/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py +++ b/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise.py @@ -95,7 +95,11 @@ def get_random_transformation(self, point_clouds, **kwargs): point_clouds[0], valid_points, axis=0 )[randomly_select_point_index, :POINTCLOUD_LABEL_INDEX] - num_frames, num_points, num_features = point_clouds.get_shape().as_list() + ( + num_frames, + num_points, + num_features, + ) = point_clouds.get_shape().as_list() frustum_mask = [] for f in range(num_frames): frustum_mask.append( @@ -122,7 +126,9 @@ def get_random_transformation(self, point_clouds, **kwargs): ) # Do add feature noise outside the frustum mask. random_point_noise = tf.where(~frustum_mask, 1.0, noise) - random_point_noise = tf.cast(random_point_noise, dtype=self.compute_dtype) + random_point_noise = tf.cast( + random_point_noise, dtype=self.compute_dtype + ) return {"point_noise": random_point_noise} def augment_point_clouds_bounding_boxes( diff --git a/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise_test.py b/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise_test.py index b86dc9673f..29ab15c0b7 100644 --- a/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise_test.py +++ b/keras_cv/layers/preprocessing_3d/frustum_random_point_feature_noise_test.py @@ -44,7 +44,10 @@ def test_augment_point_clouds_and_bounding_boxes(self): def test_augment_specific_point_clouds_and_bounding_boxes(self): tf.keras.utils.set_random_seed(2) add_layer = FrustumRandomPointFeatureNoise( - r_distance=10, theta_width=np.pi, phi_width=1.5 * np.pi, max_noise_level=0.5 + r_distance=10, + theta_width=np.pi, + phi_width=1.5 * np.pi, + max_noise_level=0.5, ) point_clouds = np.array( [ @@ -86,7 +89,10 @@ def test_augment_specific_point_clouds_and_bounding_boxes(self): def test_augment_only_one_valid_point_point_clouds_and_bounding_boxes(self): tf.keras.utils.set_random_seed(2) add_layer = FrustumRandomPointFeatureNoise( - r_distance=10, theta_width=np.pi, phi_width=1.5 * np.pi, max_noise_level=0.5 + r_distance=10, + theta_width=np.pi, + phi_width=1.5 * np.pi, + max_noise_level=0.5, ) point_clouds = np.array( [ diff --git a/keras_cv/layers/preprocessing_3d/global_random_dropping_points.py b/keras_cv/layers/preprocessing_3d/global_random_dropping_points.py index 26d1f11418..bae7469c29 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_dropping_points.py +++ b/keras_cv/layers/preprocessing_3d/global_random_dropping_points.py @@ -21,7 +21,9 @@ @tf.keras.utils.register_keras_serializable(package="keras_cv") -class GlobalRandomDroppingPoints(base_augmentation_layer_3d.BaseAugmentationLayer3D): +class GlobalRandomDroppingPoints( + base_augmentation_layer_3d.BaseAugmentationLayer3D +): """A preprocessing layer which randomly drops point during training. This layer will randomly drop points based on keep_probability. diff --git a/keras_cv/layers/preprocessing_3d/global_random_flip.py b/keras_cv/layers/preprocessing_3d/global_random_flip.py index 93a86f0ce4..8f3c33b8f3 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_flip.py +++ b/keras_cv/layers/preprocessing_3d/global_random_flip.py @@ -69,7 +69,8 @@ def augment_point_clouds_bounding_boxes( ): point_clouds_y = -point_clouds[..., 1:2] point_clouds = tf.concat( - [point_clouds[..., 0:1], point_clouds_y, point_clouds[..., 2:]], axis=-1 + [point_clouds[..., 0:1], point_clouds_y, point_clouds[..., 2:]], + axis=-1, ) # Flip boxes. bounding_boxes_y = -bounding_boxes[ diff --git a/keras_cv/layers/preprocessing_3d/global_random_flip_test.py b/keras_cv/layers/preprocessing_3d/global_random_flip_test.py index d498d7da71..c94baa9228 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_flip_test.py +++ b/keras_cv/layers/preprocessing_3d/global_random_flip_test.py @@ -32,19 +32,21 @@ def test_augment_random_point_clouds_and_bounding_boxes(self): def test_augment_specific_random_point_clouds_and_bounding_boxes(self): add_layer = GlobalRandomFlip() - point_clouds = np.array([[[1, 1, 2, 3, 4, 5, 6, 7, 8, 9]] * 2] * 2).astype( + point_clouds = np.array( + [[[1, 1, 2, 3, 4, 5, 6, 7, 8, 9]] * 2] * 2 + ).astype("float32") + bounding_boxes = np.array([[[1, 1, 2, 3, 4, 5, 1]] * 2] * 2).astype( "float32" ) - bounding_boxes = np.array([[[1, 1, 2, 3, 4, 5, 1]] * 2] * 2).astype("float32") inputs = {POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes} outputs = add_layer(inputs) flipped_point_clouds = np.array( [[[1, -1, 2, 3, 4, 5, 6, 7, 8, 9]] * 2] * 2 ).astype("float32") - flipped_bounding_boxes = np.array([[[1, -1, 2, 3, 4, 5, -1]] * 2] * 2).astype( - "float32" - ) + flipped_bounding_boxes = np.array( + [[[1, -1, 2, 3, 4, 5, -1]] * 2] * 2 + ).astype("float32") self.assertAllClose(outputs[POINT_CLOUDS], flipped_point_clouds) self.assertAllClose(outputs[BOUNDING_BOXES], flipped_bounding_boxes) @@ -57,12 +59,18 @@ def test_augment_batch_point_clouds_and_bounding_boxes(self): self.assertNotAllClose(inputs, outputs) def test_noop_raises_error(self): - with self.assertRaisesRegexp(ValueError, "must flip over at least 1 axis"): + with self.assertRaisesRegexp( + ValueError, "must flip over at least 1 axis" + ): _ = GlobalRandomFlip(flip_x=False, flip_y=False, flip_z=False) def test_flip_x_or_z_raises_error(self): - with self.assertRaisesRegexp(ValueError, "only supports flipping over the Y"): + with self.assertRaisesRegexp( + ValueError, "only supports flipping over the Y" + ): _ = GlobalRandomFlip(flip_x=True, flip_y=False, flip_z=False) - with self.assertRaisesRegexp(ValueError, "only supports flipping over the Y"): + with self.assertRaisesRegexp( + ValueError, "only supports flipping over the Y" + ): _ = GlobalRandomFlip(flip_x=False, flip_y=False, flip_z=True) diff --git a/keras_cv/layers/preprocessing_3d/global_random_rotation.py b/keras_cv/layers/preprocessing_3d/global_random_rotation.py index 6b952634d2..cd2fba8d1d 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_rotation.py +++ b/keras_cv/layers/preprocessing_3d/global_random_rotation.py @@ -60,9 +60,15 @@ def __init__( **kwargs ): super().__init__(**kwargs) - max_rotation_angle_x = max_rotation_angle_x if max_rotation_angle_x else 0.0 - max_rotation_angle_y = max_rotation_angle_y if max_rotation_angle_y else 0.0 - max_rotation_angle_z = max_rotation_angle_z if max_rotation_angle_z else 0.0 + max_rotation_angle_x = ( + max_rotation_angle_x if max_rotation_angle_x else 0.0 + ) + max_rotation_angle_y = ( + max_rotation_angle_y if max_rotation_angle_y else 0.0 + ) + max_rotation_angle_z = ( + max_rotation_angle_z if max_rotation_angle_z else 0.0 + ) if max_rotation_angle_x < 0: raise ValueError("max_rotation_angle_x must be >=0.") @@ -102,7 +108,14 @@ def get_random_transformation(self, **kwargs): ) return { "pose": tf.stack( - [0, 0, 0, random_rotation_z, random_rotation_x, random_rotation_y], + [ + 0, + 0, + 0, + random_rotation_z, + random_rotation_x, + random_rotation_y, + ], axis=0, ) } @@ -112,13 +125,17 @@ def augment_point_clouds_bounding_boxes( ): pose = transformation["pose"] point_clouds_xyz = coordinate_transform(point_clouds[..., :3], pose) - point_clouds = tf.concat([point_clouds_xyz, point_clouds[..., 3:]], axis=-1) + point_clouds = tf.concat( + [point_clouds_xyz, point_clouds[..., 3:]], axis=-1 + ) bounding_boxes_xyz = coordinate_transform( bounding_boxes[..., : CENTER_XYZ_DXDYDZ_PHI.Z + 1], pose ) bounding_boxes_heading = wrap_angle_radians( - tf.expand_dims(bounding_boxes[..., CENTER_XYZ_DXDYDZ_PHI.PHI], axis=-1) + tf.expand_dims( + bounding_boxes[..., CENTER_XYZ_DXDYDZ_PHI.PHI], axis=-1 + ) - pose[3] ) bounding_boxes = tf.concat( diff --git a/keras_cv/layers/preprocessing_3d/global_random_rotation_test.py b/keras_cv/layers/preprocessing_3d/global_random_rotation_test.py index 409691cf9b..d2e4eae10d 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_rotation_test.py +++ b/keras_cv/layers/preprocessing_3d/global_random_rotation_test.py @@ -15,7 +15,9 @@ import tensorflow as tf from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.layers.preprocessing_3d.global_random_rotation import GlobalRandomRotation +from keras_cv.layers.preprocessing_3d.global_random_rotation import ( + GlobalRandomRotation, +) POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES @@ -24,7 +26,9 @@ class GlobalRandomRotationTest(tf.test.TestCase): def test_augment_point_clouds_and_bounding_boxes(self): add_layer = GlobalRandomRotation( - max_rotation_angle_x=1.0, max_rotation_angle_y=1.0, max_rotation_angle_z=1.0 + max_rotation_angle_x=1.0, + max_rotation_angle_y=1.0, + max_rotation_angle_z=1.0, ) point_clouds = np.random.random(size=(2, 50, 10)).astype("float32") bounding_boxes = np.random.random(size=(2, 10, 7)).astype("float32") @@ -34,7 +38,9 @@ def test_augment_point_clouds_and_bounding_boxes(self): def test_not_augment_point_clouds_and_bounding_boxes(self): add_layer = GlobalRandomRotation( - max_rotation_angle_x=0.0, max_rotation_angle_y=0.0, max_rotation_angle_z=0.0 + max_rotation_angle_x=0.0, + max_rotation_angle_y=0.0, + max_rotation_angle_z=0.0, ) point_clouds = np.random.random(size=(2, 50, 10)).astype("float32") bounding_boxes = np.random.random(size=(2, 10, 7)).astype("float32") @@ -44,7 +50,9 @@ def test_not_augment_point_clouds_and_bounding_boxes(self): def test_augment_batch_point_clouds_and_bounding_boxes(self): add_layer = GlobalRandomRotation( - max_rotation_angle_x=1.0, max_rotation_angle_y=1.0, max_rotation_angle_z=1.0 + max_rotation_angle_x=1.0, + max_rotation_angle_y=1.0, + max_rotation_angle_z=1.0, ) point_clouds = np.random.random(size=(3, 2, 50, 10)).astype("float32") bounding_boxes = np.random.random(size=(3, 2, 10, 7)).astype("float32") @@ -54,7 +62,9 @@ def test_augment_batch_point_clouds_and_bounding_boxes(self): def test_not_augment_batch_point_clouds_and_bounding_boxes(self): add_layer = GlobalRandomRotation( - max_rotation_angle_x=0.0, max_rotation_angle_y=0.0, max_rotation_angle_z=0.0 + max_rotation_angle_x=0.0, + max_rotation_angle_y=0.0, + max_rotation_angle_z=0.0, ) point_clouds = np.random.random(size=(3, 2, 50, 10)).astype("float32") bounding_boxes = np.random.random(size=(3, 2, 10, 7)).astype("float32") diff --git a/keras_cv/layers/preprocessing_3d/global_random_scaling.py b/keras_cv/layers/preprocessing_3d/global_random_scaling.py index 9561e4ee5e..b51213642c 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_scaling.py +++ b/keras_cv/layers/preprocessing_3d/global_random_scaling.py @@ -173,7 +173,9 @@ def augment_point_clouds_bounding_boxes( ): scale = transformation["scale"][tf.newaxis, tf.newaxis, :] point_clouds_xyz = point_clouds[..., :3] * scale - point_clouds = tf.concat([point_clouds_xyz, point_clouds[..., 3:]], axis=-1) + point_clouds = tf.concat( + [point_clouds_xyz, point_clouds[..., 3:]], axis=-1 + ) bounding_boxes_xyzdxdydz = bounding_boxes[ ..., : CENTER_XYZ_DXDYDZ_PHI.DZ + 1 diff --git a/keras_cv/layers/preprocessing_3d/global_random_scaling_test.py b/keras_cv/layers/preprocessing_3d/global_random_scaling_test.py index e735539596..c8aa65b774 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_scaling_test.py +++ b/keras_cv/layers/preprocessing_3d/global_random_scaling_test.py @@ -15,7 +15,9 @@ import tensorflow as tf from keras_cv.layers.preprocessing_3d import base_augmentation_layer_3d -from keras_cv.layers.preprocessing_3d.global_random_scaling import GlobalRandomScaling +from keras_cv.layers.preprocessing_3d.global_random_scaling import ( + GlobalRandomScaling, +) POINT_CLOUDS = base_augmentation_layer_3d.POINT_CLOUDS BOUNDING_BOXES = base_augmentation_layer_3d.BOUNDING_BOXES @@ -65,18 +67,20 @@ def test_2x_scaling_point_clouds_and_bounding_boxes(self): y_factor=(2.0, 2.0), z_factor=(2.0, 2.0), ) - point_clouds = np.array([[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]] * 2] * 2).astype( + point_clouds = np.array( + [[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]] * 2] * 2 + ).astype("float32") + bounding_boxes = np.array([[[0, 1, 2, 3, 4, 5, 6]] * 2] * 2).astype( "float32" ) - bounding_boxes = np.array([[[0, 1, 2, 3, 4, 5, 6]] * 2] * 2).astype("float32") inputs = {POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes} outputs = add_layer(inputs) scaled_point_clouds = np.array( [[[0, 2, 4, 3, 4, 5, 6, 7, 8, 9]] * 2] * 2 ).astype("float32") - scaled_bounding_boxes = np.array([[[0, 2, 4, 6, 8, 10, 6]] * 2] * 2).astype( - "float32" - ) + scaled_bounding_boxes = np.array( + [[[0, 2, 4, 6, 8, 10, 6]] * 2] * 2 + ).astype("float32") self.assertAllClose(outputs[POINT_CLOUDS], scaled_point_clouds) self.assertAllClose(outputs[BOUNDING_BOXES], scaled_bounding_boxes) diff --git a/keras_cv/layers/preprocessing_3d/global_random_translation.py b/keras_cv/layers/preprocessing_3d/global_random_translation.py index 5509f0a13e..9385aa42f1 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_translation.py +++ b/keras_cv/layers/preprocessing_3d/global_random_translation.py @@ -23,7 +23,9 @@ @tf.keras.utils.register_keras_serializable(package="keras_cv") -class GlobalRandomTranslation(base_augmentation_layer_3d.BaseAugmentationLayer3D): +class GlobalRandomTranslation( + base_augmentation_layer_3d.BaseAugmentationLayer3D +): """A preprocessing layer which randomly translates point clouds and bounding boxes along X, Y, and Z axes during training. @@ -101,13 +103,18 @@ def augment_point_clouds_bounding_boxes( ): pose = transformation["pose"] point_clouds_xyz = coordinate_transform(point_clouds[..., :3], pose) - point_clouds = tf.concat([point_clouds_xyz, point_clouds[..., 3:]], axis=-1) + point_clouds = tf.concat( + [point_clouds_xyz, point_clouds[..., 3:]], axis=-1 + ) bounding_boxes_xyz = coordinate_transform( bounding_boxes[..., : CENTER_XYZ_DXDYDZ_PHI.Z + 1], pose ) bounding_boxes = tf.concat( - [bounding_boxes_xyz, bounding_boxes[..., CENTER_XYZ_DXDYDZ_PHI.DX :]], + [ + bounding_boxes_xyz, + bounding_boxes[..., CENTER_XYZ_DXDYDZ_PHI.DX :], + ], axis=-1, ) diff --git a/keras_cv/layers/preprocessing_3d/global_random_translation_test.py b/keras_cv/layers/preprocessing_3d/global_random_translation_test.py index dc7e766e26..f8f96346dc 100644 --- a/keras_cv/layers/preprocessing_3d/global_random_translation_test.py +++ b/keras_cv/layers/preprocessing_3d/global_random_translation_test.py @@ -25,7 +25,9 @@ class GlobalRandomTranslationTest(tf.test.TestCase): def test_augment_point_clouds_and_bounding_boxes(self): - add_layer = GlobalRandomTranslation(x_stddev=1.0, y_stddev=1.0, z_stddev=1.0) + add_layer = GlobalRandomTranslation( + x_stddev=1.0, y_stddev=1.0, z_stddev=1.0 + ) point_clouds = np.random.random(size=(2, 50, 10)).astype("float32") bounding_boxes = np.random.random(size=(2, 10, 7)).astype("float32") inputs = {POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes} @@ -33,7 +35,9 @@ def test_augment_point_clouds_and_bounding_boxes(self): self.assertNotAllClose(inputs, outputs) def test_not_augment_point_clouds_and_bounding_boxes(self): - add_layer = GlobalRandomTranslation(x_stddev=0.0, y_stddev=0.0, z_stddev=0.0) + add_layer = GlobalRandomTranslation( + x_stddev=0.0, y_stddev=0.0, z_stddev=0.0 + ) point_clouds = np.random.random(size=(2, 50, 10)).astype("float32") bounding_boxes = np.random.random(size=(2, 10, 7)).astype("float32") inputs = {POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes} @@ -41,7 +45,9 @@ def test_not_augment_point_clouds_and_bounding_boxes(self): self.assertAllClose(inputs, outputs) def test_augment_batch_point_clouds_and_bounding_boxes(self): - add_layer = GlobalRandomTranslation(x_stddev=1.0, y_stddev=1.0, z_stddev=1.0) + add_layer = GlobalRandomTranslation( + x_stddev=1.0, y_stddev=1.0, z_stddev=1.0 + ) point_clouds = np.random.random(size=(3, 2, 50, 10)).astype("float32") bounding_boxes = np.random.random(size=(3, 2, 10, 7)).astype("float32") inputs = {POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes} @@ -49,7 +55,9 @@ def test_augment_batch_point_clouds_and_bounding_boxes(self): self.assertNotAllClose(inputs, outputs) def test_not_augment_batch_point_clouds_and_bounding_boxes(self): - add_layer = GlobalRandomTranslation(x_stddev=0.0, y_stddev=0.0, z_stddev=0.0) + add_layer = GlobalRandomTranslation( + x_stddev=0.0, y_stddev=0.0, z_stddev=0.0 + ) point_clouds = np.random.random(size=(3, 2, 50, 10)).astype("float32") bounding_boxes = np.random.random(size=(3, 2, 10, 7)).astype("float32") inputs = {POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes} diff --git a/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes.py b/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes.py index 109407d1eb..dc6368bbd0 100644 --- a/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes.py +++ b/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes.py @@ -26,7 +26,9 @@ @tf.keras.utils.register_keras_serializable(package="keras_cv") -class GroupPointsByBoundingBoxes(base_augmentation_layer_3d.BaseAugmentationLayer3D): +class GroupPointsByBoundingBoxes( + base_augmentation_layer_3d.BaseAugmentationLayer3D +): """A preprocessing layer which groups point clouds based on bounding boxes during training. This layer will group point clouds based on bounding boxes and generate OBJECT_POINT_CLOUDS and OBJECT_BOUNDING_BOXES tensors. @@ -93,7 +95,8 @@ def augment_point_clouds_bounding_boxes( ): if self._label_index: bounding_boxes_mask = ( - bounding_boxes[0, :, CENTER_XYZ_DXDYDZ_PHI.CLASS] == self._label_index + bounding_boxes[0, :, CENTER_XYZ_DXDYDZ_PHI.CLASS] + == self._label_index ) object_bounding_boxes = tf.boolean_mask( bounding_boxes, bounding_boxes_mask, axis=1 @@ -112,7 +115,9 @@ def augment_point_clouds_bounding_boxes( # Filter bounding boxes using the current frame. # [num_boxes] min_points_filter = ( - tf.reduce_sum(tf.cast(points_in_bounding_boxes[0], dtype=tf.int32), axis=0) + tf.reduce_sum( + tf.cast(points_in_bounding_boxes[0], dtype=tf.int32), axis=0 + ) >= self._min_points_per_bounding_boxes ) @@ -124,7 +129,9 @@ def augment_point_clouds_bounding_boxes( points_in_bounding_boxes, min_points_filter, axis=2 ) # [num of frames, num of boxes, num of points]. - points_in_bounding_boxes = tf.transpose(points_in_bounding_boxes, [0, 2, 1]) + points_in_bounding_boxes = tf.transpose( + points_in_bounding_boxes, [0, 2, 1] + ) points_in_bounding_boxes = tf.cast(points_in_bounding_boxes, tf.int32) sort_valid_index = tf.argsort( points_in_bounding_boxes, axis=-1, direction="DESCENDING" @@ -156,7 +163,8 @@ def augment_point_clouds_bounding_boxes_v2( ): if self._label_index: bounding_boxes_mask = ( - bounding_boxes[0, :, CENTER_XYZ_DXDYDZ_PHI.CLASS] == self._label_index + bounding_boxes[0, :, CENTER_XYZ_DXDYDZ_PHI.CLASS] + == self._label_index ) object_bounding_boxes = tf.boolean_mask( bounding_boxes, bounding_boxes_mask, axis=1 @@ -228,7 +236,9 @@ def call(self, inputs, training=True): bounding_boxes = inputs[BOUNDING_BOXES] if point_clouds.shape.rank == 3 and bounding_boxes.shape.rank == 3: return self._augment(inputs) - elif point_clouds.shape.rank == 4 and bounding_boxes.shape.rank == 4: + elif ( + point_clouds.shape.rank == 4 and bounding_boxes.shape.rank == 4 + ): batch = point_clouds.get_shape().as_list()[0] object_point_clouds_list = [] object_bounding_boxes_list = [] diff --git a/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes_test.py b/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes_test.py index fd987f95aa..587f9e2e3f 100644 --- a/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes_test.py +++ b/keras_cv/layers/preprocessing_3d/group_points_by_bounding_boxes_test.py @@ -36,7 +36,14 @@ def test_augment_point_clouds_and_bounding_boxes(self): max_points_per_bounding_boxes=2, ) point_clouds = np.array( - [[[0, 1, 2, 3, 4], [10, 1, 2, 3, 4], [0, -1, 2, 3, 4], [100, 100, 2, 3, 4]]] + [ + [ + [0, 1, 2, 3, 4], + [10, 1, 2, 3, 4], + [0, -1, 2, 3, 4], + [100, 100, 2, 3, 4], + ] + ] * 2 ).astype("float32") bounding_boxes = np.array( @@ -56,7 +63,12 @@ def test_augment_point_clouds_and_bounding_boxes(self): } outputs = add_layer(inputs) object_point_clouds = np.array( - [[[[0, 1, 2, 3, 4], [0, -1, 2, 3, 4]], [[10, 1, 2, 3, 4], [0, 0, 0, 0, 0]]]] + [ + [ + [[0, 1, 2, 3, 4], [0, -1, 2, 3, 4]], + [[10, 1, 2, 3, 4], [0, 0, 0, 0, 0]], + ] + ] * 2 ).astype("float32") object_bounding_boxes = np.array( @@ -66,10 +78,14 @@ def test_augment_point_clouds_and_bounding_boxes(self): self.assertAllClose(inputs[BOUNDING_BOXES], outputs[BOUNDING_BOXES]) self.assertAllClose(inputs["dummy_item"], outputs["dummy_item"]) # Sort the point clouds due to the orders of points are different when using Tensorflow and Metal+Tensorflow (MAC). - outputs[OBJECT_POINT_CLOUDS] = tf.sort(outputs[OBJECT_POINT_CLOUDS], axis=-2) + outputs[OBJECT_POINT_CLOUDS] = tf.sort( + outputs[OBJECT_POINT_CLOUDS], axis=-2 + ) object_point_clouds = tf.sort(object_point_clouds, axis=-2) self.assertAllClose(outputs[OBJECT_POINT_CLOUDS], object_point_clouds) - self.assertAllClose(outputs[OBJECT_BOUNDING_BOXES], object_bounding_boxes) + self.assertAllClose( + outputs[OBJECT_BOUNDING_BOXES], object_bounding_boxes + ) def test_not_augment_point_clouds_and_bounding_boxes(self): add_layer = GroupPointsByBoundingBoxes( @@ -78,7 +94,14 @@ def test_not_augment_point_clouds_and_bounding_boxes(self): max_points_per_bounding_boxes=2, ) point_clouds = np.array( - [[[0, 1, 2, 3, 4], [10, 1, 2, 3, 4], [0, -1, 2, 3, 4], [100, 100, 2, 3, 4]]] + [ + [ + [0, 1, 2, 3, 4], + [10, 1, 2, 3, 4], + [0, -1, 2, 3, 4], + [100, 100, 2, 3, 4], + ] + ] * 2 ).astype("float32") bounding_boxes = np.array( @@ -149,13 +172,18 @@ def test_augment_batch_point_clouds_and_bounding_boxes(self): self.assertAllClose(inputs[POINT_CLOUDS], outputs[POINT_CLOUDS]) self.assertAllClose(inputs[BOUNDING_BOXES], outputs[BOUNDING_BOXES]) # Sort the point clouds due to the orders of points are different when using Tensorflow and Metal+Tensorflow (MAC). - outputs[OBJECT_POINT_CLOUDS] = tf.sort(outputs[OBJECT_POINT_CLOUDS], axis=-2) + outputs[OBJECT_POINT_CLOUDS] = tf.sort( + outputs[OBJECT_POINT_CLOUDS], axis=-2 + ) object_point_clouds = tf.sort(object_point_clouds, axis=-2) self.assertAllClose(outputs[OBJECT_POINT_CLOUDS], object_point_clouds) - self.assertAllClose(outputs[OBJECT_BOUNDING_BOXES], object_bounding_boxes) + self.assertAllClose( + outputs[OBJECT_BOUNDING_BOXES], object_bounding_boxes + ) @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_augment_point_clouds_and_bounding_boxes_v2(self): @@ -165,7 +193,14 @@ def test_augment_point_clouds_and_bounding_boxes_v2(self): max_points_per_bounding_boxes=2, ) point_clouds = np.array( - [[[0, 1, 2, 3, 4], [10, 1, 2, 3, 4], [0, -1, 2, 3, 4], [100, 100, 2, 3, 4]]] + [ + [ + [0, 1, 2, 3, 4], + [10, 1, 2, 3, 4], + [0, -1, 2, 3, 4], + [100, 100, 2, 3, 4], + ] + ] * 2 ).astype("float32") bounding_boxes = np.array( @@ -185,7 +220,12 @@ def test_augment_point_clouds_and_bounding_boxes_v2(self): ) object_point_clouds, object_bounding_boxes = outputs[0], outputs[1] expected_object_point_clouds = np.array( - [[[[0, 1, 2, 3, 4], [0, -1, 2, 3, 4]], [[10, 1, 2, 3, 4], [0, 0, 0, 0, 0]]]] + [ + [ + [[0, 1, 2, 3, 4], [0, -1, 2, 3, 4]], + [[10, 1, 2, 3, 4], [0, 0, 0, 0, 0]], + ] + ] * 2 ).astype("float32") expected_object_bounding_boxes = np.array( diff --git a/keras_cv/layers/preprocessing_3d/random_copy_paste.py b/keras_cv/layers/preprocessing_3d/random_copy_paste.py index 2f6512e8ca..4894c4cf31 100644 --- a/keras_cv/layers/preprocessing_3d/random_copy_paste.py +++ b/keras_cv/layers/preprocessing_3d/random_copy_paste.py @@ -103,7 +103,9 @@ def get_random_transformation( minval=self._min_paste_bounding_boxes, maxval=self._max_paste_bounding_boxes, ) - num_paste_bounding_boxes = tf.cast(num_paste_bounding_boxes, dtype=tf.int32) + num_paste_bounding_boxes = tf.cast( + num_paste_bounding_boxes, dtype=tf.int32 + ) num_existing_bounding_boxes = tf.shape(bounding_boxes)[1] if self._label_index: object_mask = ( @@ -118,8 +120,12 @@ def get_random_transformation( ) shuffle_index = tf.range(tf.shape(object_point_clouds)[1]) shuffle_index = tf.random.shuffle(shuffle_index) - object_point_clouds = tf.gather(object_point_clouds, shuffle_index, axis=1) - object_bounding_boxes = tf.gather(object_bounding_boxes, shuffle_index, axis=1) + object_point_clouds = tf.gather( + object_point_clouds, shuffle_index, axis=1 + ) + object_bounding_boxes = tf.gather( + object_bounding_boxes, shuffle_index, axis=1 + ) # Load at most 5 times num_paste_bounding_boxes to check overlaps. num_compare_bounding_boxes = tf.math.minimum( @@ -127,12 +133,16 @@ def get_random_transformation( tf.shape(object_point_clouds)[1], ) - object_point_clouds = object_point_clouds[:, :num_compare_bounding_boxes, :] - object_bounding_boxes = object_bounding_boxes[:, :num_compare_bounding_boxes, :] - # Use the current frame to check overlap between existing bounding boxes and pasted bounding boxes - all_bounding_boxes = tf.concat([bounding_boxes, object_bounding_boxes], axis=1)[ - 0, :, :7 + object_point_clouds = object_point_clouds[ + :, :num_compare_bounding_boxes, : + ] + object_bounding_boxes = object_bounding_boxes[ + :, :num_compare_bounding_boxes, : ] + # Use the current frame to check overlap between existing bounding boxes and pasted bounding boxes + all_bounding_boxes = tf.concat( + [bounding_boxes, object_bounding_boxes], axis=1 + )[0, :, :7] iou = iou_3d(all_bounding_boxes, all_bounding_boxes) iou = tf.linalg.band_part(iou, -1, 0) iou_sum = tf.reduce_sum(iou[num_existing_bounding_boxes:], axis=1) @@ -144,8 +154,12 @@ def get_random_transformation( object_bounding_boxes = tf.boolean_mask( object_bounding_boxes, non_overlapping_mask, axis=1 ) - object_point_clouds = object_point_clouds[:, :num_paste_bounding_boxes, :] - object_bounding_boxes = object_bounding_boxes[:, :num_paste_bounding_boxes, :] + object_point_clouds = object_point_clouds[ + :, :num_paste_bounding_boxes, : + ] + object_bounding_boxes = object_bounding_boxes[ + :, :num_paste_bounding_boxes, : + ] return { OBJECT_POINT_CLOUDS: object_point_clouds, OBJECT_BOUNDING_BOXES: object_bounding_boxes, @@ -186,7 +200,8 @@ def augment_point_clouds_bounding_boxes( existing_bounding_boxes = tf.boolean_mask( bounding_boxes[frame_index], tf.math.greater( - bounding_boxes[frame_index, :, CENTER_XYZ_DXDYDZ_PHI.CLASS], 0.0 + bounding_boxes[frame_index, :, CENTER_XYZ_DXDYDZ_PHI.CLASS], + 0.0, ), ) paste_bounding_boxes = tf.boolean_mask( @@ -200,7 +215,9 @@ def augment_point_clouds_bounding_boxes( axis=0, ) bounding_boxes_list += [ - tf.concat([paste_bounding_boxes, existing_bounding_boxes], axis=0) + tf.concat( + [paste_bounding_boxes, existing_bounding_boxes], axis=0 + ) ] point_clouds = tf.ragged.stack(point_clouds_list) @@ -228,7 +245,9 @@ def _augment(self, inputs): bounding_boxes=bounding_boxes, transformation=transformation, ) - result.update({POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes}) + result.update( + {POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes} + ) return result def call(self, inputs, training=True): @@ -237,7 +256,9 @@ def call(self, inputs, training=True): bounding_boxes = inputs[BOUNDING_BOXES] if point_clouds.shape.rank == 3 and bounding_boxes.shape.rank == 3: return self._augment(inputs) - elif point_clouds.shape.rank == 4 and bounding_boxes.shape.rank == 4: + elif ( + point_clouds.shape.rank == 4 and bounding_boxes.shape.rank == 4 + ): batch = point_clouds.get_shape().as_list()[0] point_clouds_list = [] bounding_boxes_list = [] diff --git a/keras_cv/layers/preprocessing_3d/random_copy_paste_test.py b/keras_cv/layers/preprocessing_3d/random_copy_paste_test.py index 21b0561ad3..d10a0ef689 100644 --- a/keras_cv/layers/preprocessing_3d/random_copy_paste_test.py +++ b/keras_cv/layers/preprocessing_3d/random_copy_paste_test.py @@ -28,7 +28,8 @@ class RandomCopyPasteTest(tf.test.TestCase): @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_augment_point_clouds_and_bounding_boxes(self): @@ -118,7 +119,9 @@ def test_augment_point_clouds_and_bounding_boxes(self): ] * 2 ).astype("float32") - self.assertAllClose(inputs[OBJECT_POINT_CLOUDS], outputs[OBJECT_POINT_CLOUDS]) + self.assertAllClose( + inputs[OBJECT_POINT_CLOUDS], outputs[OBJECT_POINT_CLOUDS] + ) self.assertAllClose( inputs[OBJECT_BOUNDING_BOXES], outputs[OBJECT_BOUNDING_BOXES] ) @@ -126,7 +129,8 @@ def test_augment_point_clouds_and_bounding_boxes(self): self.assertAllClose(outputs[BOUNDING_BOXES], augmented_bounding_boxes) @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_augment_batch_point_clouds_and_bounding_boxes(self): @@ -228,7 +232,9 @@ def test_augment_batch_point_clouds_and_bounding_boxes(self): ] * 3 ).astype("float32") - self.assertAllClose(inputs[OBJECT_POINT_CLOUDS], outputs[OBJECT_POINT_CLOUDS]) + self.assertAllClose( + inputs[OBJECT_POINT_CLOUDS], outputs[OBJECT_POINT_CLOUDS] + ) self.assertAllClose( inputs[OBJECT_BOUNDING_BOXES], outputs[OBJECT_BOUNDING_BOXES] ) diff --git a/keras_cv/layers/preprocessing_3d/random_drop_box.py b/keras_cv/layers/preprocessing_3d/random_drop_box.py index 3b1d961e01..23201e7d5f 100644 --- a/keras_cv/layers/preprocessing_3d/random_drop_box.py +++ b/keras_cv/layers/preprocessing_3d/random_drop_box.py @@ -86,7 +86,9 @@ def get_random_transformation(self, point_clouds, bounding_boxes, **kwargs): ) # Randomly remove max_drop_bounding_boxes number of bounding boxes. num_bounding_boxes = bounding_boxes.get_shape().as_list()[1] - random_scores_for_selected_boxes = tf.random.uniform(shape=[num_bounding_boxes]) + random_scores_for_selected_boxes = tf.random.uniform( + shape=[num_bounding_boxes] + ) random_scores_for_selected_boxes = tf.where( selected_boxes_mask, random_scores_for_selected_boxes, 0.0 ) diff --git a/keras_cv/layers/preprocessing_3d/swap_background.py b/keras_cv/layers/preprocessing_3d/swap_background.py index 227242e874..4aa3c18290 100644 --- a/keras_cv/layers/preprocessing_3d/swap_background.py +++ b/keras_cv/layers/preprocessing_3d/swap_background.py @@ -86,7 +86,9 @@ def get_random_transformation( bounding_boxes[..., : CENTER_XYZ_DXDYDZ_PHI.CLASS], keepdims=True, ) - point_clouds = tf.where(~objects_points_in_point_clouds, point_clouds, 0.0) + point_clouds = tf.where( + ~objects_points_in_point_clouds, point_clouds, 0.0 + ) # Extract objects from additional_point_clouds. objects_points_in_additional_point_clouds = is_within_any_box3d( @@ -95,7 +97,9 @@ def get_random_transformation( keepdims=True, ) additional_point_clouds = tf.where( - objects_points_in_additional_point_clouds, additional_point_clouds, 0.0 + objects_points_in_additional_point_clouds, + additional_point_clouds, + 0.0, ) # Remove backgorund points in point_clouds overlaps with additional_bounding_boxes. @@ -130,12 +134,15 @@ def augment_point_clouds_bounding_boxes( ) object_point_clouds = tf.boolean_mask( additional_point_clouds[frame_index], - additional_point_clouds[frame_index, :, POINTCLOUD_LABEL_INDEX] > 0, + additional_point_clouds[frame_index, :, POINTCLOUD_LABEL_INDEX] + > 0, axis=0, ) point_clouds_list += [ - tf.concat([object_point_clouds, background_point_clouds], axis=0) + tf.concat( + [object_point_clouds, background_point_clouds], axis=0 + ) ] point_clouds = tf.ragged.stack(point_clouds_list) @@ -165,5 +172,7 @@ def _augment(self, inputs): bounding_boxes=bounding_boxes, transformation=transformation, ) - result.update({POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes}) + result.update( + {POINT_CLOUDS: point_clouds, BOUNDING_BOXES: bounding_boxes} + ) return result diff --git a/keras_cv/layers/preprocessing_3d/swap_background_test.py b/keras_cv/layers/preprocessing_3d/swap_background_test.py index fce86e4be9..5adc8ca1c1 100644 --- a/keras_cv/layers/preprocessing_3d/swap_background_test.py +++ b/keras_cv/layers/preprocessing_3d/swap_background_test.py @@ -124,7 +124,8 @@ def test_augment_point_clouds_and_bounding_boxes(self): inputs[ADDITIONAL_POINT_CLOUDS], outputs[ADDITIONAL_POINT_CLOUDS] ) self.assertAllClose( - inputs[ADDITIONAL_BOUNDING_BOXES], outputs[ADDITIONAL_BOUNDING_BOXES] + inputs[ADDITIONAL_BOUNDING_BOXES], + outputs[ADDITIONAL_BOUNDING_BOXES], ) self.assertAllClose(outputs[POINT_CLOUDS], augmented_point_clouds) self.assertAllClose(outputs[BOUNDING_BOXES], augmented_bounding_boxes) @@ -247,7 +248,8 @@ def test_augment_batch_point_clouds_and_bounding_boxes(self): inputs[ADDITIONAL_POINT_CLOUDS], outputs[ADDITIONAL_POINT_CLOUDS] ) self.assertAllClose( - inputs[ADDITIONAL_BOUNDING_BOXES], outputs[ADDITIONAL_BOUNDING_BOXES] + inputs[ADDITIONAL_BOUNDING_BOXES], + outputs[ADDITIONAL_BOUNDING_BOXES], ) self.assertAllClose(outputs[POINT_CLOUDS], augmented_point_clouds) self.assertAllClose(outputs[BOUNDING_BOXES], augmented_bounding_boxes) diff --git a/keras_cv/layers/regularization/dropblock_2d.py b/keras_cv/layers/regularization/dropblock_2d.py index 0410112407..b8b631e8cc 100644 --- a/keras_cv/layers/regularization/dropblock_2d.py +++ b/keras_cv/layers/regularization/dropblock_2d.py @@ -146,7 +146,10 @@ def __init__( ) self._rate = rate - self._dropblock_height, self._dropblock_width = conv_utils.normalize_tuple( + ( + self._dropblock_height, + self._dropblock_width, + ) = conv_utils.normalize_tuple( value=block_size, n=2, name="block_size", allow_zero=False ) self.seed = seed @@ -209,10 +212,12 @@ def call(self, x, training=None): ) # Slightly scale the values, to account for magnitude change - percent_ones = tf.cast(tf.reduce_sum(block_pattern), tf.float32) / tf.cast( - tf.size(block_pattern), tf.float32 + percent_ones = tf.cast( + tf.reduce_sum(block_pattern), tf.float32 + ) / tf.cast(tf.size(block_pattern), tf.float32) + return ( + x / tf.cast(percent_ones, x.dtype) * tf.cast(block_pattern, x.dtype) ) - return x / tf.cast(percent_ones, x.dtype) * tf.cast(block_pattern, x.dtype) def get_config(self): config = { diff --git a/keras_cv/layers/regularization/squeeze_excite.py b/keras_cv/layers/regularization/squeeze_excite.py index 261a7c7f91..03f836ceb9 100644 --- a/keras_cv/layers/regularization/squeeze_excite.py +++ b/keras_cv/layers/regularization/squeeze_excite.py @@ -68,10 +68,14 @@ def __init__( self.filters = filters if ratio <= 0.0 or ratio >= 1.0: - raise ValueError(f"`ratio` should be a float between 0 and 1. Got {ratio}") + raise ValueError( + f"`ratio` should be a float between 0 and 1. Got {ratio}" + ) if filters <= 0 or not isinstance(filters, int): - raise ValueError(f"`filters` should be a positive integer. Got {filters}") + raise ValueError( + f"`filters` should be a positive integer. Got {filters}" + ) self.ratio = ratio self.bottleneck_filters = int(self.filters * self.ratio) @@ -109,11 +113,15 @@ def get_config(self): @classmethod def from_config(cls, config): if isinstance(config["squeeze_activation"], dict): - config["squeeze_activation"] = tf.keras.utils.deserialize_keras_object( + config[ + "squeeze_activation" + ] = tf.keras.utils.deserialize_keras_object( config["squeeze_activation"] ) if isinstance(config["excite_activation"], dict): - config["excite_activation"] = tf.keras.utils.deserialize_keras_object( + config[ + "excite_activation" + ] = tf.keras.utils.deserialize_keras_object( config["excite_activation"] ) return cls(**config) diff --git a/keras_cv/layers/regularization/squeeze_excite_test.py b/keras_cv/layers/regularization/squeeze_excite_test.py index 21ccc5ddff..1d55f7b989 100644 --- a/keras_cv/layers/regularization/squeeze_excite_test.py +++ b/keras_cv/layers/regularization/squeeze_excite_test.py @@ -44,7 +44,8 @@ def custom_activation(x): def test_raises_invalid_ratio_error(self): with self.assertRaisesRegex( - ValueError, "`ratio` should be a float" " between 0 and 1. Got (.*?)" + ValueError, + "`ratio` should be a float" " between 0 and 1. Got (.*?)", ): _ = SqueezeAndExcite2D(8, ratio=1.1) diff --git a/keras_cv/layers/regularization/stochastic_depth_test.py b/keras_cv/layers/regularization/stochastic_depth_test.py index f66b092e75..e8aab9fa1f 100644 --- a/keras_cv/layers/regularization/stochastic_depth_test.py +++ b/keras_cv/layers/regularization/stochastic_depth_test.py @@ -24,7 +24,8 @@ def test_inputs_have_two_elements(self): inputs = [inputs, inputs, inputs] with self.assertRaisesRegex( - ValueError, "Input must be a list of length 2. " "Got input with length=3." + ValueError, + "Input must be a list of length 2. " "Got input with length=3.", ): StochasticDepth()(inputs) diff --git a/keras_cv/layers/serialization_test.py b/keras_cv/layers/serialization_test.py index 3fbd1c79f8..0e98fc85ff 100644 --- a/keras_cv/layers/serialization_test.py +++ b/keras_cv/layers/serialization_test.py @@ -408,6 +408,8 @@ def assertAllInitParametersAreInConfig(self, layer_cls, config): if v not in excluded_name } - intersection_with_config = {v for v in config.keys() if v in parameter_names} + intersection_with_config = { + v for v in config.keys() if v in parameter_names + } self.assertSetEqual(parameter_names, intersection_with_config) diff --git a/keras_cv/layers/spatial_pyramid.py b/keras_cv/layers/spatial_pyramid.py index 6114168e4b..8e4df5acdd 100644 --- a/keras_cv/layers/spatial_pyramid.py +++ b/keras_cv/layers/spatial_pyramid.py @@ -117,7 +117,9 @@ def build(self, input_shape): ), tf.keras.layers.BatchNormalization(), tf.keras.layers.Activation(self.activation), - tf.keras.layers.Resizing(height, width, interpolation="bilinear"), + tf.keras.layers.Resizing( + height, width, interpolation="bilinear" + ), ] ) self.aspp_parallel_channels.append(pool_sequential) diff --git a/keras_cv/layers/transformer_encoder.py b/keras_cv/layers/transformer_encoder.py index 87465921d1..826c398d40 100644 --- a/keras_cv/layers/transformer_encoder.py +++ b/keras_cv/layers/transformer_encoder.py @@ -67,8 +67,12 @@ def __init__( self.layer_norm_epsilon = layer_norm_epsilon self.mlp_units = [mlp_dim, project_dim] - self.layer_norm1 = layers.LayerNormalization(epsilon=self.layer_norm_epsilon) - self.layer_norm2 = layers.LayerNormalization(epsilon=self.layer_norm_epsilon) + self.layer_norm1 = layers.LayerNormalization( + epsilon=self.layer_norm_epsilon + ) + self.layer_norm2 = layers.LayerNormalization( + epsilon=self.layer_norm_epsilon + ) self.attn = layers.MultiHeadAttention( num_heads=self.num_heads, key_dim=self.project_dim // self.num_heads, diff --git a/keras_cv/layers/vit_layers.py b/keras_cv/layers/vit_layers.py index 6df0a0cb90..59a1116bbb 100644 --- a/keras_cv/layers/vit_layers.py +++ b/keras_cv/layers/vit_layers.py @@ -77,7 +77,10 @@ def build(self, input_shape): shape=[1, 1, self.project_dim], name="class_token", trainable=True ) self.num_patches = ( - input_shape[1] // self.patch_size * input_shape[2] // self.patch_size + input_shape[1] + // self.patch_size + * input_shape[2] + // self.patch_size ) self.position_embedding = layers.Embedding( input_dim=self.num_patches + 1, output_dim=self.project_dim @@ -123,7 +126,9 @@ def call( ), dtype=patches_flattened.dtype, ) - patches_flattened = tf.concat([class_token_broadcast, patches_flattened], 1) + patches_flattened = tf.concat( + [class_token_broadcast, patches_flattened], 1 + ) positions = tf.range(start=0, limit=self.num_patches + 1, delta=1) if interpolate and None not in ( @@ -154,7 +159,9 @@ def call( encoded = patches_flattened + self.position_embedding(positions) return encoded - def __interpolate_positional_embeddings(self, embedding, height, width, patch_size): + def __interpolate_positional_embeddings( + self, embedding, height, width, patch_size + ): """ Allows for pre-trained position embedding interpolation. This trick allows you to fine-tune a ViT on higher resolution images than it was trained on. diff --git a/keras_cv/layers/vit_layers_test.py b/keras_cv/layers/vit_layers_test.py index 5c2158ee19..3d8fd0d3be 100644 --- a/keras_cv/layers/vit_layers_test.py +++ b/keras_cv/layers/vit_layers_test.py @@ -19,7 +19,8 @@ class ViTLayersTest(tf.test.TestCase): def test_patching_wrong_patch_size(self): with self.assertRaisesRegexp( - ValueError, "The patch_size cannot be a negative number. Received -16" + ValueError, + "The patch_size cannot be a negative number. Received -16", ): PatchingAndEmbedding(project_dim=16, patch_size=-16) @@ -28,7 +29,9 @@ def test_patching_wrong_padding(self): ValueError, "Padding must be either 'SAME' or 'VALID', but REFLECT was passed.", ): - PatchingAndEmbedding(project_dim=16, patch_size=16, padding="REFLECT") + PatchingAndEmbedding( + project_dim=16, patch_size=16, padding="REFLECT" + ) def test_patch_embedding_return_type_and_shape(self): layer = PatchingAndEmbedding(project_dim=128, patch_size=16) @@ -68,4 +71,6 @@ def test_patch_embedding_interpolation_numerical(self): positional_embeddings, height=8, width=8, patch_size=2 ) - self.assertTrue(tf.reduce_all(tf.equal(output, tf.ones([1, 16, 4]))).numpy()) + self.assertTrue( + tf.reduce_all(tf.equal(output, tf.ones([1, 16, 4]))).numpy() + ) diff --git a/keras_cv/losses/focal.py b/keras_cv/losses/focal.py index 3e96709f2b..7a6790e6b9 100644 --- a/keras_cv/losses/focal.py +++ b/keras_cv/losses/focal.py @@ -69,7 +69,9 @@ def __init__( self.label_smoothing = label_smoothing def _smooth_labels(self, y_true): - return y_true * (1.0 - self.label_smoothing) + 0.5 * self.label_smoothing + return ( + y_true * (1.0 - self.label_smoothing) + 0.5 * self.label_smoothing + ) def call(self, y_true, y_pred): y_pred = tf.convert_to_tensor(y_pred) diff --git a/keras_cv/losses/focal_test.py b/keras_cv/losses/focal_test.py index 3b6c938a93..77efffd2c7 100644 --- a/keras_cv/losses/focal_test.py +++ b/keras_cv/losses/focal_test.py @@ -23,7 +23,9 @@ def test_output_shape(self): tf.random.uniform(shape=[2, 5], minval=0, maxval=2, dtype=tf.int32), tf.float32, ) - y_pred = tf.random.uniform(shape=[2, 5], minval=0, maxval=1, dtype=tf.float32) + y_pred = tf.random.uniform( + shape=[2, 5], minval=0, maxval=1, dtype=tf.float32 + ) focal_loss = FocalLoss(reduction="sum") @@ -34,7 +36,9 @@ def test_output_shape_reduction_none(self): tf.random.uniform(shape=[2, 5], minval=0, maxval=2, dtype=tf.int32), tf.float32, ) - y_pred = tf.random.uniform(shape=[2, 5], minval=0, maxval=1, dtype=tf.float32) + y_pred = tf.random.uniform( + shape=[2, 5], minval=0, maxval=1, dtype=tf.float32 + ) focal_loss = FocalLoss(reduction="none") diff --git a/keras_cv/losses/giou_loss.py b/keras_cv/losses/giou_loss.py index 8c8d7e756a..1b8d59bec8 100644 --- a/keras_cv/losses/giou_loss.py +++ b/keras_cv/losses/giou_loss.py @@ -123,7 +123,9 @@ def _compute_giou(self, boxes1, boxes2): # giou calculation enclose_area = self._compute_enclosure(boxes1, boxes2) - return iou - tf.math.divide_no_nan((enclose_area - union_area), enclose_area) + return iou - tf.math.divide_no_nan( + (enclose_area - union_area), enclose_area + ) def call(self, y_true, y_pred, sample_weight=None): if sample_weight is not None: diff --git a/keras_cv/losses/giou_loss_test.py b/keras_cv/losses/giou_loss_test.py index 9e83e0c5eb..ce9c4fffea 100644 --- a/keras_cv/losses/giou_loss_test.py +++ b/keras_cv/losses/giou_loss_test.py @@ -19,16 +19,24 @@ class GIoUTest(tf.test.TestCase): def test_output_shape(self): - y_true = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32) - y_pred = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32) + y_true = tf.random.uniform( + shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32 + ) + y_pred = tf.random.uniform( + shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32 + ) giou_loss = GIoULoss(bounding_box_format="xywh") self.assertAllEqual(giou_loss(y_true, y_pred).shape, ()) def test_output_shape_reduction_none(self): - y_true = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32) - y_pred = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32) + y_true = tf.random.uniform( + shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32 + ) + y_pred = tf.random.uniform( + shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32 + ) giou_loss = GIoULoss(bounding_box_format="xywh", reduction="none") diff --git a/keras_cv/losses/iou_loss_test.py b/keras_cv/losses/iou_loss_test.py index e0abea5b4a..1f969c59eb 100644 --- a/keras_cv/losses/iou_loss_test.py +++ b/keras_cv/losses/iou_loss_test.py @@ -19,16 +19,24 @@ class IoUTest(tf.test.TestCase): def test_output_shape(self): - y_true = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32) - y_pred = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32) + y_true = tf.random.uniform( + shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32 + ) + y_pred = tf.random.uniform( + shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32 + ) iou_loss = IoULoss(bounding_box_format="xywh") self.assertAllEqual(iou_loss(y_true, y_pred).shape, ()) def test_output_shape_reduction_none(self): - y_true = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32) - y_pred = tf.random.uniform(shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32) + y_true = tf.random.uniform( + shape=(2, 2, 4), minval=0, maxval=10, dtype=tf.int32 + ) + y_pred = tf.random.uniform( + shape=(2, 2, 4), minval=0, maxval=20, dtype=tf.int32 + ) iou_loss = IoULoss(bounding_box_format="xywh", reduction="none") diff --git a/keras_cv/losses/numerical_tests/focal_loss_numerical_test.py b/keras_cv/losses/numerical_tests/focal_loss_numerical_test.py index ebd323368a..1e2e1d1481 100644 --- a/keras_cv/losses/numerical_tests/focal_loss_numerical_test.py +++ b/keras_cv/losses/numerical_tests/focal_loss_numerical_test.py @@ -41,13 +41,17 @@ def call(self, y_true, y_pred): modulator = tf.pow(1.0 - probs_gt, self._gamma) loss = modulator * cross_entropy weighted_loss = tf.where( - positive_label_mask, self._alpha * loss, (1.0 - self._alpha) * loss + positive_label_mask, + self._alpha * loss, + (1.0 - self._alpha) * loss, ) return weighted_loss -class FocalLossModelGardenComparisonTest(tf.test.TestCase, parameterized.TestCase): +class FocalLossModelGardenComparisonTest( + tf.test.TestCase, parameterized.TestCase +): @parameterized.named_parameters( ("sum", "sum"), ) @@ -65,5 +69,6 @@ def test_model_garden_implementation_has_same_outputs(self, reduction): y_true = tf.cast(y_true, tf.float32) y_pred = tf.random.uniform((200, 10), dtype=tf.float32) self.assertAllClose( - focal_loss(y_true, y_pred), model_garden_focal_loss(y_true, y_pred) + focal_loss(y_true, y_pred), + model_garden_focal_loss(y_true, y_pred), ) diff --git a/keras_cv/losses/penalty_reduced_focal_loss_test.py b/keras_cv/losses/penalty_reduced_focal_loss_test.py index f0cb8470c2..d797dba424 100644 --- a/keras_cv/losses/penalty_reduced_focal_loss_test.py +++ b/keras_cv/losses/penalty_reduced_focal_loss_test.py @@ -24,7 +24,9 @@ def test_output_shape(self): tf.random.uniform(shape=[2, 5], minval=0, maxval=2, dtype=tf.int32), tf.float32, ) - y_pred = tf.random.uniform(shape=[2, 5], minval=0, maxval=1, dtype=tf.float32) + y_pred = tf.random.uniform( + shape=[2, 5], minval=0, maxval=1, dtype=tf.float32 + ) focal_loss = BinaryPenaltyReducedFocalCrossEntropy(reduction="sum") @@ -35,7 +37,9 @@ def test_output_shape_reduction_none(self): tf.random.uniform(shape=[2, 5], minval=0, maxval=2, dtype=tf.int32), tf.float32, ) - y_pred = tf.random.uniform(shape=[2, 5], minval=0, maxval=1, dtype=tf.float32) + y_pred = tf.random.uniform( + shape=[2, 5], minval=0, maxval=1, dtype=tf.float32 + ) focal_loss = BinaryPenaltyReducedFocalCrossEntropy(reduction="none") @@ -55,7 +59,9 @@ def test_output_with_pos_label_neg_pred(self): y_pred = tf.constant([np.exp(-1.0)]) focal_loss = BinaryPenaltyReducedFocalCrossEntropy(reduction="sum") # (1-1/e)^2 * log(1/e) - self.assertAllClose(np.square(1 - np.exp(-1.0)), focal_loss(y_true, y_pred)) + self.assertAllClose( + np.square(1 - np.exp(-1.0)), focal_loss(y_true, y_pred) + ) def test_output_with_neg_label_pred(self): y_true = tf.constant([0.0]) @@ -68,12 +74,16 @@ def test_output_with_neg_label_pos_pred(self): y_pred = tf.constant([1.0 - np.exp(-1.0)]) focal_loss = BinaryPenaltyReducedFocalCrossEntropy(reduction="sum") # (1-0)^4 * (1-1/e)^2 * log(1/e) - self.assertAllClose(np.square(1 - np.exp(-1.0)), focal_loss(y_true, y_pred)) + self.assertAllClose( + np.square(1 - np.exp(-1.0)), focal_loss(y_true, y_pred) + ) def test_output_with_weak_label_pos_pred(self): y_true = tf.constant([0.5]) y_pred = tf.constant([1.0 - np.exp(-1.0)]) - focal_loss = BinaryPenaltyReducedFocalCrossEntropy(beta=2.0, reduction="sum") + focal_loss = BinaryPenaltyReducedFocalCrossEntropy( + beta=2.0, reduction="sum" + ) # (1-0.5)^2 * (1-1/e)^2 * log(1/e) self.assertAllClose( 0.25 * np.square(1 - np.exp(-1.0)), focal_loss(y_true, y_pred) diff --git a/keras_cv/losses/serialization_test.py b/keras_cv/losses/serialization_test.py index dcdac018bf..fce759a09a 100644 --- a/keras_cv/losses/serialization_test.py +++ b/keras_cv/losses/serialization_test.py @@ -44,7 +44,9 @@ def test_loss_serialization(self, loss_cls, init_args): reconstructed_loss = loss_cls.from_config(config) self.assertTrue( - test_utils.config_equals(loss.get_config(), reconstructed_loss.get_config()) + test_utils.config_equals( + loss.get_config(), reconstructed_loss.get_config() + ) ) def assertAllInitParametersAreInConfig(self, loss_cls, config): @@ -55,6 +57,8 @@ def assertAllInitParametersAreInConfig(self, loss_cls, config): if v not in excluded_name } - intersection_with_config = {v for v in config.keys() if v in parameter_names} + intersection_with_config = { + v for v in config.keys() if v in parameter_names + } self.assertSetEqual(parameter_names, intersection_with_config) diff --git a/keras_cv/losses/simclr_loss.py b/keras_cv/losses/simclr_loss.py index 2e25da20b7..0107d4b3f4 100644 --- a/keras_cv/losses/simclr_loss.py +++ b/keras_cv/losses/simclr_loss.py @@ -58,18 +58,22 @@ def call(self, projections_1, projections_2): # Compute logits logits_11 = ( - tf.matmul(projections_1, projections_1, transpose_b=True) / self.temperature + tf.matmul(projections_1, projections_1, transpose_b=True) + / self.temperature ) logits_11 = logits_11 - tf.cast(masks * LARGE_NUM, logits_11.dtype) logits_22 = ( - tf.matmul(projections_2, projections_2, transpose_b=True) / self.temperature + tf.matmul(projections_2, projections_2, transpose_b=True) + / self.temperature ) logits_22 = logits_22 - tf.cast(masks * LARGE_NUM, logits_22.dtype) logits_12 = ( - tf.matmul(projections_1, projections_2, transpose_b=True) / self.temperature + tf.matmul(projections_1, projections_2, transpose_b=True) + / self.temperature ) logits_21 = ( - tf.matmul(projections_2, projections_1, transpose_b=True) / self.temperature + tf.matmul(projections_2, projections_1, transpose_b=True) + / self.temperature ) loss_a = keras.losses.categorical_crossentropy( diff --git a/keras_cv/losses/simclr_loss_test.py b/keras_cv/losses/simclr_loss_test.py index 620d433c76..b293838286 100644 --- a/keras_cv/losses/simclr_loss_test.py +++ b/keras_cv/losses/simclr_loss_test.py @@ -40,7 +40,9 @@ def test_output_shape_reduction_none(self): simclr_loss = SimCLRLoss(temperature=1, reduction="none") - self.assertAllEqual(simclr_loss(projections_1, projections_2).shape, (10,)) + self.assertAllEqual( + simclr_loss(projections_1, projections_2).shape, (10,) + ) def test_output_value(self): projections_1 = [ diff --git a/keras_cv/metrics/__init__.py b/keras_cv/metrics/__init__.py index 3297ac328e..13951e07bd 100644 --- a/keras_cv/metrics/__init__.py +++ b/keras_cv/metrics/__init__.py @@ -12,5 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from keras_cv.metrics.coco.mean_average_precision import _COCOMeanAveragePrecision +from keras_cv.metrics.coco.mean_average_precision import ( + _COCOMeanAveragePrecision, +) from keras_cv.metrics.coco.recall import _COCORecall diff --git a/keras_cv/metrics/coco/__init__.py b/keras_cv/metrics/coco/__init__.py index 8caebfa46b..ce3fa3333d 100644 --- a/keras_cv/metrics/coco/__init__.py +++ b/keras_cv/metrics/coco/__init__.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from keras_cv.metrics.coco.mean_average_precision import _COCOMeanAveragePrecision +from keras_cv.metrics.coco.mean_average_precision import ( + _COCOMeanAveragePrecision, +) try: from keras_cv.metrics.coco.pycoco_wrapper import PyCOCOWrapper diff --git a/keras_cv/metrics/coco/mean_average_precision.py b/keras_cv/metrics/coco/mean_average_precision.py index 6c2a8be251..78b46e9140 100644 --- a/keras_cv/metrics/coco/mean_average_precision.py +++ b/keras_cv/metrics/coco/mean_average_precision.py @@ -103,11 +103,15 @@ def __init__( super().__init__(**kwargs) # Initialize parameter values self.bounding_box_format = bounding_box_format - self.iou_thresholds = iou_thresholds or [x / 100.0 for x in range(50, 100, 5)] + self.iou_thresholds = iou_thresholds or [ + x / 100.0 for x in range(50, 100, 5) + ] self.area_range = area_range self.max_detections = max_detections self.class_ids = list(class_ids) - self.recall_thresholds = recall_thresholds or [x / 100 for x in range(0, 101)] + self.recall_thresholds = recall_thresholds or [ + x / 100 for x in range(0, 101) + ] self.num_buckets = num_buckets self.num_iou_thresholds = len(self.iou_thresholds) @@ -146,8 +150,12 @@ def __init__( ) def reset_state(self): - self.true_positive_buckets.assign(tf.zeros_like(self.true_positive_buckets)) - self.false_positive_buckets.assign(tf.zeros_like(self.false_positive_buckets)) + self.true_positive_buckets.assign( + tf.zeros_like(self.true_positive_buckets) + ) + self.false_positive_buckets.assign( + tf.zeros_like(self.false_positive_buckets) + ) self.ground_truths.assign(tf.zeros_like(self.ground_truths)) @tf.function() @@ -178,15 +186,21 @@ def update_state(self, y_true, y_pred, sample_weight=None): ) class_ids = tf.constant(self.class_ids, dtype=self.compute_dtype) - iou_thresholds = tf.constant(self.iou_thresholds, dtype=self.compute_dtype) + iou_thresholds = tf.constant( + self.iou_thresholds, dtype=self.compute_dtype + ) num_images = tf.shape(y_true)[0] - y_pred = utils.sort_bounding_boxes(y_pred, axis=bounding_box.XYXY.CONFIDENCE) + y_pred = utils.sort_bounding_boxes( + y_pred, axis=bounding_box.XYXY.CONFIDENCE + ) ground_truth_boxes_update = tf.zeros_like(self.ground_truths) true_positive_buckets_update = tf.zeros_like(self.true_positive_buckets) - false_positive_buckets_update = tf.zeros_like(self.false_positive_buckets) + false_positive_buckets_update = tf.zeros_like( + self.false_positive_buckets + ) for img in tf.range(num_images): ground_truths = utils.filter_out_sentinels(y_true[img]) @@ -209,12 +223,16 @@ def update_state(self, y_true, y_pred, sample_weight=None): false_positives_update = tf.TensorArray( tf.int32, size=self.num_class_ids * self.num_iou_thresholds ) - ground_truths_update = tf.TensorArray(tf.int32, size=self.num_class_ids) + ground_truths_update = tf.TensorArray( + tf.int32, size=self.num_class_ids + ) for c_i in tf.range(self.num_class_ids): category_id = class_ids[c_i] ground_truths_by_category = utils.filter_boxes( - ground_truths, value=category_id, axis=bounding_box.XYXY.CLASS + ground_truths, + value=category_id, + axis=bounding_box.XYXY.CLASS, ) detections_by_category = utils.filter_boxes( detections, value=category_id, axis=bounding_box.XYXY.CLASS @@ -236,7 +254,9 @@ def update_state(self, y_true, y_pred, sample_weight=None): iou_threshold = iou_thresholds[iou_i] pred_matches = utils.match_boxes(ious, iou_threshold) - dt_scores = detections_by_category[:, bounding_box.XYXY.CONFIDENCE] + dt_scores = detections_by_category[ + :, bounding_box.XYXY.CONFIDENCE + ] true_positives = pred_matches != -1 false_positives = pred_matches == -1 @@ -244,7 +264,9 @@ def update_state(self, y_true, y_pred, sample_weight=None): dt_scores_clipped = tf.clip_by_value(dt_scores, 0.0, 1.0) # We must divide by 1.01 to prevent off by one errors. confidence_buckets = tf.cast( - tf.math.floor(self.num_buckets * (dt_scores_clipped / 1.01)), + tf.math.floor( + self.num_buckets * (dt_scores_clipped / 1.01) + ), tf.int32, ) true_positives_by_bucket = tf.gather_nd( @@ -306,7 +328,9 @@ def result(self): true_positives_sum = tf.cumsum(true_positives, axis=-1) false_positives_sum = tf.cumsum(false_positives, axis=-1) - present_categories = tf.math.reduce_sum(tf.cast(ground_truths != 0, tf.int32)) + present_categories = tf.math.reduce_sum( + tf.cast(ground_truths != 0, tf.int32) + ) if present_categories == 0: return 0.0 @@ -314,7 +338,9 @@ def result(self): recalls = tf.math.divide_no_nan( true_positives_sum, ground_truths[:, None, None] ) - precisions = true_positives_sum / (false_positives_sum + true_positives_sum) + precisions = true_positives_sum / ( + false_positives_sum + true_positives_sum + ) result = tf.TensorArray( tf.float32, size=self.num_class_ids * self.num_iou_thresholds @@ -348,8 +374,12 @@ def result(self): precisions_i = tf.concat([precisions_i, zero_pad], axis=-1) precision_per_recall_threshold = tf.gather(precisions_i, inds) - result_ij = tf.math.reduce_mean(precision_per_recall_threshold, axis=-1) - result = result.write(j + i * self.num_iou_thresholds, result_ij) + result_ij = tf.math.reduce_mean( + precision_per_recall_threshold, axis=-1 + ) + result = result.write( + j + i * self.num_iou_thresholds, result_ij + ) result = tf.reshape( result.stack(), (self.num_class_ids, self.num_iou_thresholds) diff --git a/keras_cv/metrics/coco/mean_average_precision_test.py b/keras_cv/metrics/coco/mean_average_precision_test.py index aa56260f9d..64ab9b66df 100644 --- a/keras_cv/metrics/coco/mean_average_precision_test.py +++ b/keras_cv/metrics/coco/mean_average_precision_test.py @@ -34,10 +34,12 @@ def DISABLE_test_runs_inside_model(self): ) # These would match if they were in the area range - y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype(np.float32) - y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.5]]]).astype( + y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype( np.float32 ) + y_pred = np.array( + [[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.5]]] + ).astype(np.float32) model.compile(metrics=[mean_average_precision]) @@ -206,7 +208,8 @@ def DISABLE_test_counting_with_missing_class_present_in_data(self): dtype=tf.float64, ) y_pred = tf.constant( - [[[0, 50, 100, 150, 1, 1.0], [0, 50, 100, 150, 33, 1.0]]], dtype=tf.float32 + [[[0, 50, 100, 150, 1, 1.0], [0, 50, 100, 150, 33, 1.0]]], + dtype=tf.float32, ) metric = _COCOMeanAveragePrecision( diff --git a/keras_cv/metrics/coco/numerical_tests/GenerateSamples.ipynb b/keras_cv/metrics/coco/numerical_tests/GenerateSamples.ipynb index 94bece8d5e..445583daad 100644 --- a/keras_cv/metrics/coco/numerical_tests/GenerateSamples.ipynb +++ b/keras_cv/metrics/coco/numerical_tests/GenerateSamples.ipynb @@ -192,7 +192,9 @@ "\n", "\n", "results = []\n", - "for gt in [gt for gt in ground_truths[\"annotations\"] if gt[\"image_id\"] in img_ids]:\n", + "for gt in [\n", + " gt for gt in ground_truths[\"annotations\"] if gt[\"image_id\"] in img_ids\n", + "]:\n", " result = {\n", " \"image_id\": gt[\"image_id\"],\n", " \"area\": gt[\"area\"],\n", @@ -316,7 +318,9 @@ " y, x = a.shape\n", " y_pad = y_ - y\n", " x_pad = x_ - x\n", - " return np.pad(a, ((0, y_pad), (0, x_pad)), mode=\"constant\", constant_values=-1)\n", + " return np.pad(\n", + " a, ((0, y_pad), (0, x_pad)), mode=\"constant\", constant_values=-1\n", + " )\n", "\n", "\n", "result = [to_shape(r, (m, 5)) for r in result]\n", @@ -449,7 +453,9 @@ " y, x = a.shape\n", " y_pad = y_ - y\n", " x_pad = x_ - x\n", - " return np.pad(a, ((0, y_pad), (0, x_pad)), mode=\"constant\", constant_values=-1)\n", + " return np.pad(\n", + " a, ((0, y_pad), (0, x_pad)), mode=\"constant\", constant_values=-1\n", + " )\n", "\n", "\n", "result = [to_shape(r, (m, 6)) for r in result]\n", diff --git a/keras_cv/metrics/coco/pycoco_wrapper.py b/keras_cv/metrics/coco/pycoco_wrapper.py index 6b1e55bee9..4da022405c 100644 --- a/keras_cv/metrics/coco/pycoco_wrapper.py +++ b/keras_cv/metrics/coco/pycoco_wrapper.py @@ -78,7 +78,9 @@ def loadRes(self, predictions): image_ids = [ann["image_id"] for ann in predictions] if set(image_ids) != (set(image_ids) & set(self.getImgIds())): - raise ValueError("Results do not correspond to the current dataset!") + raise ValueError( + "Results do not correspond to the current dataset!" + ) for ann in predictions: x1, x2, y1, y2 = [ ann["bbox"][0], @@ -105,7 +107,9 @@ def _yxyx_to_xywh(boxes): boxes_xmin = boxes[..., 1] boxes_width = boxes[..., 3] - boxes[..., 1] boxes_height = boxes[..., 2] - boxes[..., 0] - new_boxes = np.stack([boxes_xmin, boxes_ymin, boxes_width, boxes_height], axis=-1) + new_boxes = np.stack( + [boxes_xmin, boxes_ymin, boxes_width, boxes_height], axis=-1 + ) return new_boxes diff --git a/keras_cv/metrics/coco/recall.py b/keras_cv/metrics/coco/recall.py index c97376b8b0..ed78225e82 100644 --- a/keras_cv/metrics/coco/recall.py +++ b/keras_cv/metrics/coco/recall.py @@ -92,7 +92,9 @@ def __init__( super().__init__(**kwargs) # Initialize parameter values self.bounding_box_format = bounding_box_format - iou_thresholds = iou_thresholds or [x / 100.0 for x in range(50, 100, 5)] + iou_thresholds = iou_thresholds or [ + x / 100.0 for x in range(50, 100, 5) + ] self.iou_thresholds = iou_thresholds self.class_ids = list(class_ids) @@ -161,7 +163,9 @@ def update_state(self, y_true, y_pred, sample_weight=None): dtype=self.compute_dtype, ) - y_pred = utils.sort_bounding_boxes(y_pred, axis=bounding_box.XYXY.CONFIDENCE) + y_pred = utils.sort_bounding_boxes( + y_pred, axis=bounding_box.XYXY.CONFIDENCE + ) num_images = tf.shape(y_true)[0] @@ -190,7 +194,9 @@ def update_state(self, y_true, y_pred, sample_weight=None): category = class_ids[k_i] category_filtered_y_pred = utils.filter_boxes( - y_pred_for_image, value=category, axis=bounding_box.XYXY.CLASS + y_pred_for_image, + value=category, + axis=bounding_box.XYXY.CLASS, ) detections = category_filtered_y_pred @@ -198,7 +204,9 @@ def update_state(self, y_true, y_pred, sample_weight=None): detections = category_filtered_y_pred[: self.max_detections] ground_truths = utils.filter_boxes( - y_true_for_image, value=category, axis=bounding_box.XYXY.CLASS + y_true_for_image, + value=category, + axis=bounding_box.XYXY.CLASS, ) ious = iou_lib.compute_iou(ground_truths, detections, "yxyx") @@ -209,7 +217,9 @@ def update_state(self, y_true, y_pred, sample_weight=None): indices = [t_i, k_i] true_positives = tf.cast(pred_matches != -1, tf.int32) - true_positives_sum = tf.math.reduce_sum(true_positives, axis=-1) + true_positives_sum = tf.math.reduce_sum( + true_positives, axis=-1 + ) true_positives_update = tf.tensor_scatter_nd_add( true_positives_update, [indices], [true_positives_sum] @@ -238,7 +248,9 @@ def result(self): true_positives = tf.cast(self.true_positives, self.dtype) ground_truth_boxes = tf.cast(self.ground_truth_boxes, self.dtype) - recalls = tf.math.divide_no_nan(true_positives, ground_truth_boxes[None, :]) + recalls = tf.math.divide_no_nan( + true_positives, ground_truth_boxes[None, :] + ) recalls_per_threshold = ( tf.math.reduce_sum(recalls, axis=-1) / n_present_categories ) diff --git a/keras_cv/metrics/coco/recall_test.py b/keras_cv/metrics/coco/recall_test.py index 4d84bc8d01..c6ab5b842b 100644 --- a/keras_cv/metrics/coco/recall_test.py +++ b/keras_cv/metrics/coco/recall_test.py @@ -33,10 +33,12 @@ def DISABLE_test_runs_inside_model(self): ) # These would match if they were in the area range - y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype(np.float32) - y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]]).astype( + y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype( np.float32 ) + y_pred = np.array( + [[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]] + ).astype(np.float32) model.compile(metrics=[recall]) model.evaluate(y_pred, y_true) @@ -73,7 +75,9 @@ def DISABLE_test_ragged_tensor_support(self): def DISABLE_test_merge_state(self): y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32) y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) - y_pred_match = tf.constant([[[0, 0, 100, 100, 1, 1.0]]], dtype=tf.float32) + y_pred_match = tf.constant( + [[[0, 0, 100, 100, 1, 1.0]]], dtype=tf.float32 + ) m1 = _COCORecall( bounding_box_format="xyxy", @@ -117,10 +121,12 @@ def DISABLE_test_recall_area_range_filtering(self): ) # These would match if they were in the area range - y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype(np.float32) - y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]]).astype( + y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype( np.float32 ) + y_pred = np.array( + [[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]] + ).astype(np.float32) recall.update_state(y_true, y_pred) self.assertAllEqual(recall.result(), 0.0) @@ -173,7 +179,13 @@ def DISABLE_test_max_detections_one_third(self): area_range=(0, 1e9**2), ) y_true = np.array( - [[[0, 0, 100, 100, 1], [100, 100, 200, 200, 1], [300, 300, 400, 400, 1]]] + [ + [ + [0, 0, 100, 100, 1], + [100, 100, 200, 200, 1], + [300, 300, 400, 400, 1], + ] + ] ).astype(np.float32) y_pred = np.concatenate([y_true, np.ones((1, 3, 1))], axis=-1).astype( np.float32 @@ -191,7 +203,13 @@ def DISABLE_test_max_detections(self): area_range=(0, 1e9**2), ) y_true = np.array( - [[[0, 0, 100, 100, 1], [100, 100, 200, 200, 1], [300, 300, 400, 400, 1]]] + [ + [ + [0, 0, 100, 100, 1], + [100, 100, 200, 200, 1], + [300, 300, 400, 400, 1], + ] + ] ).astype(np.float32) y_pred = np.concatenate([y_true, np.ones((1, 3, 1))], axis=-1).astype( np.float32 diff --git a/keras_cv/metrics/coco/utils.py b/keras_cv/metrics/coco/utils.py index b564072810..0f657d7769 100644 --- a/keras_cv/metrics/coco/utils.py +++ b/keras_cv/metrics/coco/utils.py @@ -72,7 +72,9 @@ def filter_out_sentinels(boxes): Returns: boxes: A new Tensor of bounding boxes, where boxes[axis]!=-1. """ - return tf.gather_nd(boxes, tf.where(boxes[:, bounding_box.XYXY.CLASS] != -1)) + return tf.gather_nd( + boxes, tf.where(boxes[:, bounding_box.XYXY.CLASS] != -1) + ) def sort_bounding_boxes(boxes, axis=5): @@ -87,7 +89,9 @@ def sort_bounding_boxes(boxes, axis=5): boxes: A new Tensor of Bounding boxes, sorted on an image-wise basis. """ num_images = tf.shape(boxes)[0] - boxes_sorted_list = tf.TensorArray(tf.float32, size=num_images, dynamic_size=False) + boxes_sorted_list = tf.TensorArray( + tf.float32, size=num_images, dynamic_size=False + ) for img in tf.range(num_images): preds_for_img = boxes[img, :, :] prediction_scores = preds_for_img[:, axis] diff --git a/keras_cv/metrics/coco/utils_test.py b/keras_cv/metrics/coco/utils_test.py index 2470711e27..e17acdbc55 100644 --- a/keras_cv/metrics/coco/utils_test.py +++ b/keras_cv/metrics/coco/utils_test.py @@ -46,7 +46,9 @@ def DISABLE_test_to_sentinel_padded_bounding_box_tensor(self): box_set1 = tf.stack([_dummy_bounding_box(), _dummy_bounding_box()]) box_set2 = tf.stack([_dummy_bounding_box()]) boxes = [box_set1, box_set2] - bounding_box_tensor = utils.to_sentinel_padded_bounding_box_tensor(boxes) + bounding_box_tensor = utils.to_sentinel_padded_bounding_box_tensor( + boxes + ) self.assertAllClose( bounding_box_tensor[1, 1], -tf.ones( @@ -67,7 +69,9 @@ def DISABLE_test_end_to_end_sentinel_filtering(self): box_set1 = tf.stack([_dummy_bounding_box(), _dummy_bounding_box()]) box_set2 = tf.stack([_dummy_bounding_box()]) boxes = [box_set1, box_set2] - bounding_box_tensor = utils.to_sentinel_padded_bounding_box_tensor(boxes) + bounding_box_tensor = utils.to_sentinel_padded_bounding_box_tensor( + boxes + ) self.assertAllClose( utils.filter_out_sentinels(bounding_box_tensor[0]), box_set1 @@ -119,7 +123,9 @@ def DISABLE_test_sort_bounding_boxes_unsorted_list(self): ), axis=0, ) - y_sorted = utils.sort_bounding_boxes(y_pred, bounding_box.XYXY.CONFIDENCE) + y_sorted = utils.sort_bounding_boxes( + y_pred, bounding_box.XYXY.CONFIDENCE + ) self.assertAllClose(y_sorted, want) def DISABLE_test_sort_bounding_boxes_empty_list(self): diff --git a/keras_cv/models/__init__.py b/keras_cv/models/__init__.py index 9cdc186407..0555f0a0cc 100644 --- a/keras_cv/models/__init__.py +++ b/keras_cv/models/__init__.py @@ -59,7 +59,9 @@ from keras_cv.models.mobilenet_v3 import MobileNetV3Small from keras_cv.models.object_detection.faster_rcnn import FasterRCNN from keras_cv.models.object_detection.retina_net.retina_net import RetinaNet -from keras_cv.models.object_detection_3d.center_pillar import MultiHeadCenterPillar +from keras_cv.models.object_detection_3d.center_pillar import ( + MultiHeadCenterPillar, +) from keras_cv.models.regnet import RegNetX002 from keras_cv.models.regnet import RegNetX004 from keras_cv.models.regnet import RegNetX006 diff --git a/keras_cv/models/__internal__/__init__.py b/keras_cv/models/__internal__/__init__.py index a0471c61f6..8baa01b8fe 100644 --- a/keras_cv/models/__internal__/__init__.py +++ b/keras_cv/models/__internal__/__init__.py @@ -17,4 +17,6 @@ from keras_cv.models.__internal__.darknet_utils import DarknetConvBlockDepthwise from keras_cv.models.__internal__.darknet_utils import Focus from keras_cv.models.__internal__.darknet_utils import ResidualBlocks -from keras_cv.models.__internal__.darknet_utils import SpatialPyramidPoolingBottleneck +from keras_cv.models.__internal__.darknet_utils import ( + SpatialPyramidPoolingBottleneck, +) diff --git a/keras_cv/models/__internal__/darknet_utils.py b/keras_cv/models/__internal__/darknet_utils.py index e2c4fefb0b..007b218d5b 100644 --- a/keras_cv/models/__internal__/darknet_utils.py +++ b/keras_cv/models/__internal__/darknet_utils.py @@ -123,7 +123,11 @@ def apply(x): def SpatialPyramidPoolingBottleneck( - filters, hidden_filters=None, kernel_sizes=(5, 9, 13), activation="silu", name=None + filters, + hidden_filters=None, + kernel_sizes=(5, 9, 13), + activation="silu", + name=None, ): """Spatial pyramid pooling layer used in YOLOv3-SPP @@ -205,7 +209,9 @@ def DarknetConvBlockDepthwise( name = f"conv_block{backend.get_uid('conv_block')}" model_layers = [ - layers.DepthwiseConv2D(kernel_size, strides, padding="same", use_bias=False), + layers.DepthwiseConv2D( + kernel_size, strides, padding="same", use_bias=False + ), layers.BatchNormalization(), ] @@ -217,7 +223,9 @@ def DarknetConvBlockDepthwise( model_layers.append(layers.LeakyReLU(0.1)) model_layers.append( - DarknetConvBlock(filters, kernel_size=1, strides=1, activation=activation) + DarknetConvBlock( + filters, kernel_size=1, strides=1, activation=activation + ) ) return keras.Sequential(model_layers, name=name) @@ -258,7 +266,9 @@ def __init__( self.activation = activation hidden_channels = filters // 2 - ConvBlock = DarknetConvBlockDepthwise if use_depthwise else DarknetConvBlock + ConvBlock = ( + DarknetConvBlockDepthwise if use_depthwise else DarknetConvBlock + ) self.darknet_conv1 = DarknetConvBlock( hidden_channels, diff --git a/keras_cv/models/convmixer.py b/keras_cv/models/convmixer.py index 94f0d8b202..38c5c72b01 100644 --- a/keras_cv/models/convmixer.py +++ b/keras_cv/models/convmixer.py @@ -27,7 +27,12 @@ from keras_cv.models.weights import parse_weights MODEL_CONFIGS = { - "ConvMixer_1536_20": {"dim": 1536, "depth": 20, "patch_size": 7, "kernel_size": 9}, + "ConvMixer_1536_20": { + "dim": 1536, + "depth": 20, + "patch_size": 7, + "kernel_size": 9, + }, "ConvMixer_1536_24": { "dim": 1536, "depth": 24, @@ -122,7 +127,9 @@ def PatchEmbed(dim, patch_size): """ def apply(x): - x = layers.Conv2D(filters=dim, kernel_size=patch_size, strides=patch_size)(x) + x = layers.Conv2D( + filters=dim, kernel_size=patch_size, strides=patch_size + )(x) x = tf.nn.gelu(x) x = layers.BatchNormalization()(x) return x @@ -212,9 +219,9 @@ def ConvMixer( if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( - x - ) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) else: if pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) @@ -378,8 +385,24 @@ def ConvMixer_512_16( ) -setattr(ConvMixer_1536_20, "__doc__", BASE_DOCSTRING.format(name="ConvMixer_1536_20")) -setattr(ConvMixer_1536_24, "__doc__", BASE_DOCSTRING.format(name="ConvMixer_1536_24")) -setattr(ConvMixer_768_32, "__doc__", BASE_DOCSTRING.format(name="ConvMixer_768_32")) -setattr(ConvMixer_1024_16, "__doc__", BASE_DOCSTRING.format(name="ConvMixer_1024_16")) -setattr(ConvMixer_512_16, "__doc__", BASE_DOCSTRING.format(name="ConvMixer_512_16")) +setattr( + ConvMixer_1536_20, + "__doc__", + BASE_DOCSTRING.format(name="ConvMixer_1536_20"), +) +setattr( + ConvMixer_1536_24, + "__doc__", + BASE_DOCSTRING.format(name="ConvMixer_1536_24"), +) +setattr( + ConvMixer_768_32, "__doc__", BASE_DOCSTRING.format(name="ConvMixer_768_32") +) +setattr( + ConvMixer_1024_16, + "__doc__", + BASE_DOCSTRING.format(name="ConvMixer_1024_16"), +) +setattr( + ConvMixer_512_16, "__doc__", BASE_DOCSTRING.format(name="ConvMixer_512_16") +) diff --git a/keras_cv/models/convnext.py b/keras_cv/models/convnext.py index e226b87ae8..3909531d43 100644 --- a/keras_cv/models/convnext.py +++ b/keras_cv/models/convnext.py @@ -121,7 +121,9 @@ def __init__(self, init_values, projection_dim, **kwargs): self.projection_dim = projection_dim def build(self, input_shape): - self.gamma = tf.Variable(self.init_values * tf.ones((self.projection_dim,))) + self.gamma = tf.Variable( + self.init_values * tf.ones((self.projection_dim,)) + ) def call(self, x): return x * self.gamma @@ -188,7 +190,9 @@ def apply(inputs): name=name + "_layer_scale", )(x) if drop_path_rate: - layer = StochasticDepth(drop_path_rate, name=name + "_stochastic_depth") + layer = StochasticDepth( + drop_path_rate, name=name + "_stochastic_depth" + ) return layer([inputs, x]) else: layer = layers.Activation("linear", name=name + "_identity") @@ -213,10 +217,12 @@ def Head(num_classes, activation="softmax", name=None): def apply(x): x = layers.GlobalAveragePooling2D(name=name + "_head_gap")(x) - x = layers.LayerNormalization(epsilon=1e-6, name=name + "_head_layernorm")(x) - x = layers.Dense(num_classes, activation=activation, name=name + "_head_dense")( - x - ) + x = layers.LayerNormalization( + epsilon=1e-6, name=name + "_head_layernorm" + )(x) + x = layers.Dense( + num_classes, activation=activation, name=name + "_head_dense" + )(x) return x return apply @@ -315,7 +321,9 @@ def ConvNeXt( strides=4, name=name + "_stem_conv", ), - layers.LayerNormalization(epsilon=1e-6, name=name + "_stem_layernorm"), + layers.LayerNormalization( + epsilon=1e-6, name=name + "_stem_layernorm" + ), ], name=name + "_stem", ) @@ -346,7 +354,9 @@ def ConvNeXt( # Stochastic depth schedule. # This is referred from the original ConvNeXt codebase: # https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py#L86 - depth_drop_rates = [float(x) for x in tf.linspace(0.0, drop_path_rate, sum(depths))] + depth_drop_rates = [ + float(x) for x in tf.linspace(0.0, drop_path_rate, sum(depths)) + ] # First apply downsampling blocks and then apply ConvNeXt stages. cur = 0 diff --git a/keras_cv/models/csp_darknet.py b/keras_cv/models/csp_darknet.py index 2a6c203aeb..f7c09004c6 100644 --- a/keras_cv/models/csp_darknet.py +++ b/keras_cv/models/csp_darknet.py @@ -30,7 +30,9 @@ from keras_cv.models.__internal__.darknet_utils import DarknetConvBlock from keras_cv.models.__internal__.darknet_utils import DarknetConvBlockDepthwise from keras_cv.models.__internal__.darknet_utils import Focus -from keras_cv.models.__internal__.darknet_utils import SpatialPyramidPoolingBottleneck +from keras_cv.models.__internal__.darknet_utils import ( + SpatialPyramidPoolingBottleneck, +) from keras_cv.models.weights import parse_weights @@ -122,11 +124,15 @@ def CSPDarkNet( # stem x = Focus(name="stem_focus")(x) - x = DarknetConvBlock(base_channels, kernel_size=3, strides=1, name="stem_conv")(x) + x = DarknetConvBlock( + base_channels, kernel_size=3, strides=1, name="stem_conv" + )(x) _backbone_level_outputs = {} # dark2 - x = ConvBlock(base_channels * 2, kernel_size=3, strides=2, name="dark2_conv")(x) + x = ConvBlock( + base_channels * 2, kernel_size=3, strides=2, name="dark2_conv" + )(x) x = CrossStagePartial( base_channels * 2, num_bottlenecks=base_depth, @@ -136,7 +142,9 @@ def CSPDarkNet( _backbone_level_outputs[2] = x # dark3 - x = ConvBlock(base_channels * 4, kernel_size=3, strides=2, name="dark3_conv")(x) + x = ConvBlock( + base_channels * 4, kernel_size=3, strides=2, name="dark3_conv" + )(x) x = CrossStagePartial( base_channels * 4, num_bottlenecks=base_depth * 3, @@ -146,7 +154,9 @@ def CSPDarkNet( _backbone_level_outputs[3] = x # dark4 - x = ConvBlock(base_channels * 8, kernel_size=3, strides=2, name="dark4_conv")(x) + x = ConvBlock( + base_channels * 8, kernel_size=3, strides=2, name="dark4_conv" + )(x) x = CrossStagePartial( base_channels * 8, num_bottlenecks=base_depth * 3, @@ -156,7 +166,9 @@ def CSPDarkNet( _backbone_level_outputs[4] = x # dark5 - x = ConvBlock(base_channels * 16, kernel_size=3, strides=2, name="dark5_conv")(x) + x = ConvBlock( + base_channels * 16, kernel_size=3, strides=2, name="dark5_conv" + )(x) x = SpatialPyramidPoolingBottleneck( base_channels * 16, hidden_filters=base_channels * 8, name="dark5_spp" )(x) @@ -171,9 +183,9 @@ def CSPDarkNet( if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( - x - ) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) elif pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) elif pooling == "max": diff --git a/keras_cv/models/darknet.py b/keras_cv/models/darknet.py index dfba3d17df..bf5ea5c115 100644 --- a/keras_cv/models/darknet.py +++ b/keras_cv/models/darknet.py @@ -25,7 +25,9 @@ from keras_cv.models import utils from keras_cv.models.__internal__.darknet_utils import DarknetConvBlock from keras_cv.models.__internal__.darknet_utils import ResidualBlocks -from keras_cv.models.__internal__.darknet_utils import SpatialPyramidPoolingBottleneck +from keras_cv.models.__internal__.darknet_utils import ( + SpatialPyramidPoolingBottleneck, +) from keras_cv.models.weights import parse_weights BASE_DOCSTRING = """Instantiates the {name} architecture. @@ -142,7 +144,11 @@ def DarkNet( # stem x = DarknetConvBlock( - filters=32, kernel_size=3, strides=1, activation="leaky_relu", name="stem_conv" + filters=32, + kernel_size=3, + strides=1, + activation="leaky_relu", + name="stem_conv", )(x) x = ResidualBlocks(filters=64, num_blocks=1, name="stem_residual_block")(x) @@ -154,7 +160,9 @@ def DarkNet( for filter, block in zip(filters, blocks): x = ResidualBlocks( - filters=filter, num_blocks=block, name=f"dark{layer_num}_residual_block" + filters=filter, + num_blocks=block, + name=f"dark{layer_num}_residual_block", )(x) layer_num += 1 @@ -173,9 +181,9 @@ def DarkNet( activation="leaky_relu", name="dark5_conv2", )(x) - x = SpatialPyramidPoolingBottleneck(512, activation="leaky_relu", name="dark5_spp")( - x - ) + x = SpatialPyramidPoolingBottleneck( + 512, activation="leaky_relu", name="dark5_spp" + )(x) x = DarknetConvBlock( filters=1024, kernel_size=3, @@ -193,9 +201,9 @@ def DarkNet( if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( - x - ) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) elif pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) elif pooling == "max": diff --git a/keras_cv/models/densenet.py b/keras_cv/models/densenet.py index 77eb1a247e..a3085b63cd 100644 --- a/keras_cv/models/densenet.py +++ b/keras_cv/models/densenet.py @@ -135,15 +135,19 @@ def apply(x): axis=BN_AXIS, epsilon=1.001e-5, name=f"{name}_0_bn" )(x) x1 = layers.Activation("relu", name=f"{name}_0_relu")(x1) - x1 = layers.Conv2D(4 * growth_rate, 1, use_bias=False, name=f"{name}_1_conv")( - x1 - ) + x1 = layers.Conv2D( + 4 * growth_rate, 1, use_bias=False, name=f"{name}_1_conv" + )(x1) x1 = layers.BatchNormalization( axis=BN_AXIS, epsilon=1.001e-5, name=f"{name}_1_bn" )(x1) x1 = layers.Activation("relu", name=f"{name}_1_relu")(x1) x1 = layers.Conv2D( - growth_rate, 3, padding="same", use_bias=False, name=f"{name}_2_conv" + growth_rate, + 3, + padding="same", + use_bias=False, + name=f"{name}_2_conv", )(x1) x = layers.Concatenate(axis=BN_AXIS, name=f"{name}_concat")([x, x1]) return x @@ -219,7 +223,9 @@ def DenseNet( x = layers.Conv2D( 64, 7, strides=2, use_bias=False, padding="same", name="conv1/conv" )(x) - x = layers.BatchNormalization(axis=BN_AXIS, epsilon=1.001e-5, name="conv1/bn")(x) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=1.001e-5, name="conv1/bn" + )(x) x = layers.Activation("relu", name="conv1/relu")(x) x = layers.MaxPooling2D(3, strides=2, padding="same", name="pool1")(x) @@ -236,9 +242,9 @@ def DenseNet( if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( - x - ) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) elif pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) elif pooling == "max": diff --git a/keras_cv/models/efficientnet_lite.py b/keras_cv/models/efficientnet_lite.py index 79cb968039..7621be6252 100644 --- a/keras_cv/models/efficientnet_lite.py +++ b/keras_cv/models/efficientnet_lite.py @@ -250,8 +250,12 @@ def apply(inputs): kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "expand_conv", )(inputs) - x = layers.BatchNormalization(axis=BN_AXIS, name=name + "expand_bn")(x) - x = layers.Activation(activation, name=name + "expand_activation")(x) + x = layers.BatchNormalization( + axis=BN_AXIS, name=name + "expand_bn" + )(x) + x = layers.Activation(activation, name=name + "expand_activation")( + x + ) else: x = inputs diff --git a/keras_cv/models/efficientnet_lite_test.py b/keras_cv/models/efficientnet_lite_test.py index 6f0e3e23b9..0c65161455 100644 --- a/keras_cv/models/efficientnet_lite_test.py +++ b/keras_cv/models/efficientnet_lite_test.py @@ -33,7 +33,9 @@ """ -class EfficientNetLiteTest(ModelsTest, tf.test.TestCase, parameterized.TestCase): +class EfficientNetLiteTest( + ModelsTest, tf.test.TestCase, parameterized.TestCase +): @parameterized.parameters(*MODEL_LIST) def test_application_base(self, app, _, args): super()._test_application_base(app, _, args) diff --git a/keras_cv/models/efficientnet_v1.py b/keras_cv/models/efficientnet_v1.py index bcf1b1a78f..4082eb2a1c 100644 --- a/keras_cv/models/efficientnet_v1.py +++ b/keras_cv/models/efficientnet_v1.py @@ -244,8 +244,12 @@ def apply(inputs): kernel_initializer=CONV_KERNEL_INITIALIZER, name=name + "expand_conv", )(inputs) - x = layers.BatchNormalization(axis=BN_AXIS, name=name + "expand_bn")(x) - x = layers.Activation(activation, name=name + "expand_activation")(x) + x = layers.BatchNormalization( + axis=BN_AXIS, name=name + "expand_bn" + )(x) + x = layers.Activation(activation, name=name + "expand_activation")( + x + ) else: x = inputs @@ -411,7 +415,9 @@ def EfficientNet( def round_filters(filters, divisor=depth_divisor): """Round number of filters based on depth multiplier.""" filters *= width_coefficient - new_filters = max(divisor, int(filters + divisor / 2) // divisor * divisor) + new_filters = max( + divisor, int(filters + divisor / 2) // divisor * divisor + ) # Make sure that round down does not go down by more than 10%. if new_filters < 0.9 * filters: new_filters += divisor diff --git a/keras_cv/models/mlp_mixer.py b/keras_cv/models/mlp_mixer.py index d79878f5d0..fac6fe6e72 100644 --- a/keras_cv/models/mlp_mixer.py +++ b/keras_cv/models/mlp_mixer.py @@ -225,7 +225,9 @@ def MLPMixer( raise ValueError("Non-uniform resolutions are not supported.") if input_shape[0] % patch_size != 0: - raise ValueError("Input resolution should be divisible by the patch size.") + raise ValueError( + "Input resolution should be divisible by the patch size." + ) inputs = utils.parse_model_inputs(input_shape, input_tensor) @@ -243,15 +245,17 @@ def MLPMixer( x = layers.Reshape((x.shape[1] * x.shape[2], x.shape[3]))(x) for i in range(num_blocks): - x = MixerBlock(tokens_mlp_dim, channels_mlp_dim, name=f"mixer_block_{i}")(x) + x = MixerBlock( + tokens_mlp_dim, channels_mlp_dim, name=f"mixer_block_{i}" + )(x) x = layers.LayerNormalization()(x) if include_top: x = layers.GlobalAveragePooling1D(name="avg_pool")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( - x - ) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) elif pooling == "avg": x = layers.GlobalAveragePooling1D(name="avg_pool")(x) diff --git a/keras_cv/models/mobilenet_v3.py b/keras_cv/models/mobilenet_v3.py index 518217384a..d8df3b7a53 100644 --- a/keras_cv/models/mobilenet_v3.py +++ b/keras_cv/models/mobilenet_v3.py @@ -154,7 +154,14 @@ def apply(x): def InvertedResBlock( - expansion, filters, kernel_size, stride, se_ratio, activation, block_id, name=None + expansion, + filters, + kernel_size, + stride, + se_ratio, + activation, + block_id, + name=None, ): """An Inverted Residual Block. @@ -403,9 +410,13 @@ def MobileNetV3( if dropout_rate > 0: x = layers.Dropout(dropout_rate)(x) - x = layers.Conv2D(classes, kernel_size=1, padding="same", name="Logits")(x) + x = layers.Conv2D( + classes, kernel_size=1, padding="same", name="Logits" + )(x) x = layers.Flatten()(x) - x = layers.Activation(activation=classifier_activation, name="Predictions")(x) + x = layers.Activation( + activation=classifier_activation, name="Predictions" + )(x) elif pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) elif pooling == "max": @@ -435,37 +446,39 @@ def MobileNetV3Small( **kwargs, ): def stack_fn(x, kernel, activation, se_ratio): - x = InvertedResBlock(1, depth(16 * alpha), 3, 2, se_ratio, layers.ReLU(), 0)(x) + x = InvertedResBlock( + 1, depth(16 * alpha), 3, 2, se_ratio, layers.ReLU(), 0 + )(x) x = InvertedResBlock( 72.0 / 16, depth(24 * alpha), 3, 2, None, layers.ReLU(), 1 )(x) x = InvertedResBlock( 88.0 / 24, depth(24 * alpha), 3, 1, None, layers.ReLU(), 2 )(x) - x = InvertedResBlock(4, depth(40 * alpha), kernel, 2, se_ratio, activation, 3)( - x - ) - x = InvertedResBlock(6, depth(40 * alpha), kernel, 1, se_ratio, activation, 4)( - x - ) - x = InvertedResBlock(6, depth(40 * alpha), kernel, 1, se_ratio, activation, 5)( - x - ) - x = InvertedResBlock(3, depth(48 * alpha), kernel, 1, se_ratio, activation, 6)( - x - ) - x = InvertedResBlock(3, depth(48 * alpha), kernel, 1, se_ratio, activation, 7)( - x - ) - x = InvertedResBlock(6, depth(96 * alpha), kernel, 2, se_ratio, activation, 8)( - x - ) - x = InvertedResBlock(6, depth(96 * alpha), kernel, 1, se_ratio, activation, 9)( - x - ) - x = InvertedResBlock(6, depth(96 * alpha), kernel, 1, se_ratio, activation, 10)( - x - ) + x = InvertedResBlock( + 4, depth(40 * alpha), kernel, 2, se_ratio, activation, 3 + )(x) + x = InvertedResBlock( + 6, depth(40 * alpha), kernel, 1, se_ratio, activation, 4 + )(x) + x = InvertedResBlock( + 6, depth(40 * alpha), kernel, 1, se_ratio, activation, 5 + )(x) + x = InvertedResBlock( + 3, depth(48 * alpha), kernel, 1, se_ratio, activation, 6 + )(x) + x = InvertedResBlock( + 3, depth(48 * alpha), kernel, 1, se_ratio, activation, 7 + )(x) + x = InvertedResBlock( + 6, depth(96 * alpha), kernel, 2, se_ratio, activation, 8 + )(x) + x = InvertedResBlock( + 6, depth(96 * alpha), kernel, 1, se_ratio, activation, 9 + )(x) + x = InvertedResBlock( + 6, depth(96 * alpha), kernel, 1, se_ratio, activation, 10 + )(x) return x return MobileNetV3( @@ -504,9 +517,15 @@ def MobileNetV3Large( **kwargs, ): def stack_fn(x, kernel, activation, se_ratio): - x = InvertedResBlock(1, depth(16 * alpha), 3, 1, None, layers.ReLU(), 0)(x) - x = InvertedResBlock(4, depth(24 * alpha), 3, 2, None, layers.ReLU(), 1)(x) - x = InvertedResBlock(3, depth(24 * alpha), 3, 1, None, layers.ReLU(), 2)(x) + x = InvertedResBlock( + 1, depth(16 * alpha), 3, 1, None, layers.ReLU(), 0 + )(x) + x = InvertedResBlock( + 4, depth(24 * alpha), 3, 2, None, layers.ReLU(), 1 + )(x) + x = InvertedResBlock( + 3, depth(24 * alpha), 3, 1, None, layers.ReLU(), 2 + )(x) x = InvertedResBlock( 3, depth(40 * alpha), kernel, 2, se_ratio, layers.ReLU(), 3 )(x) @@ -517,11 +536,21 @@ def stack_fn(x, kernel, activation, se_ratio): 3, depth(40 * alpha), kernel, 1, se_ratio, layers.ReLU(), 5 )(x) x = InvertedResBlock(6, depth(80 * alpha), 3, 2, None, activation, 6)(x) - x = InvertedResBlock(2.5, depth(80 * alpha), 3, 1, None, activation, 7)(x) - x = InvertedResBlock(2.3, depth(80 * alpha), 3, 1, None, activation, 8)(x) - x = InvertedResBlock(2.3, depth(80 * alpha), 3, 1, None, activation, 9)(x) - x = InvertedResBlock(6, depth(112 * alpha), 3, 1, se_ratio, activation, 10)(x) - x = InvertedResBlock(6, depth(112 * alpha), 3, 1, se_ratio, activation, 11)(x) + x = InvertedResBlock(2.5, depth(80 * alpha), 3, 1, None, activation, 7)( + x + ) + x = InvertedResBlock(2.3, depth(80 * alpha), 3, 1, None, activation, 8)( + x + ) + x = InvertedResBlock(2.3, depth(80 * alpha), 3, 1, None, activation, 9)( + x + ) + x = InvertedResBlock( + 6, depth(112 * alpha), 3, 1, se_ratio, activation, 10 + )(x) + x = InvertedResBlock( + 6, depth(112 * alpha), 3, 1, se_ratio, activation, 11 + )(x) x = InvertedResBlock( 6, depth(160 * alpha), kernel, 2, se_ratio, activation, 12 )(x) @@ -552,5 +581,9 @@ def stack_fn(x, kernel, activation, se_ratio): ) -setattr(MobileNetV3Large, "__doc__", BASE_DOCSTRING.format(name="MobileNetV3Large")) -setattr(MobileNetV3Small, "__doc__", BASE_DOCSTRING.format(name="MobileNetV3Small")) +setattr( + MobileNetV3Large, "__doc__", BASE_DOCSTRING.format(name="MobileNetV3Large") +) +setattr( + MobileNetV3Small, "__doc__", BASE_DOCSTRING.format(name="MobileNetV3Small") +) diff --git a/keras_cv/models/models_test.py b/keras_cv/models/models_test.py index 52fa1d361c..e63e526842 100644 --- a/keras_cv/models/models_test.py +++ b/keras_cv/models/models_test.py @@ -33,7 +33,9 @@ def cleanup_global_session(self): def _test_application_base(self, app, _, args): # Can be instantiated with default arguments - model = app(include_top=True, classes=1000, include_rescaling=False, **args) + model = app( + include_top=True, classes=1000, include_rescaling=False, **args + ) # Can be serialized and deserialized config = model.get_config() @@ -49,7 +51,9 @@ def _test_application_with_rescaling(self, app, last_dim, args): self.assertIsNotNone(model.get_layer(name="rescaling")) def _test_application_pooling(self, app, last_dim, args): - model = app(include_rescaling=False, include_top=False, pooling="avg", **args) + model = app( + include_rescaling=False, include_top=False, pooling="avg", **args + ) self.assertShapeEqual(model.output_shape, (None, last_dim)) diff --git a/keras_cv/models/object_detection/__internal__.py b/keras_cv/models/object_detection/__internal__.py index a707a34b39..243638e4f2 100644 --- a/keras_cv/models/object_detection/__internal__.py +++ b/keras_cv/models/object_detection/__internal__.py @@ -38,7 +38,9 @@ def _get_tensor_types(): return (tf.Tensor, np.ndarray, pd.Series, pd.DataFrame) -def convert_inputs_to_tf_dataset(x=None, y=None, sample_weight=None, batch_size=None): +def convert_inputs_to_tf_dataset( + x=None, y=None, sample_weight=None, batch_size=None +): if sample_weight is not None: raise ValueError("RetinaNet does not yet support `sample_weight`.") diff --git a/keras_cv/models/object_detection/__test_utils__.py b/keras_cv/models/object_detection/__test_utils__.py index 3e62c7b2b8..1fb0e6d767 100644 --- a/keras_cv/models/object_detection/__test_utils__.py +++ b/keras_cv/models/object_detection/__test_utils__.py @@ -16,7 +16,9 @@ import keras_cv -def _create_bounding_box_dataset(bounding_box_format, use_dictionary_box_format=False): +def _create_bounding_box_dataset( + bounding_box_format, use_dictionary_box_format=False +): # Just about the easiest dataset you can have, all classes are 0, all boxes are # exactly the same. [1, 1, 2, 2] are the coordinates in xyxy xs = tf.ones((5, 256, 256, 3), dtype=tf.float32) @@ -27,7 +29,11 @@ def _create_bounding_box_dataset(bounding_box_format, use_dictionary_box_format= ys = tf.expand_dims(ys, axis=0) ys = tf.tile(ys, [5, 10, 1]) ys = keras_cv.bounding_box.convert_format( - ys, source="rel_xywh", target=bounding_box_format, images=xs, dtype=tf.float32 + ys, + source="rel_xywh", + target=bounding_box_format, + images=xs, + dtype=tf.float32, ) num_dets = tf.ones([5]) diff --git a/keras_cv/models/object_detection/faster_rcnn.py b/keras_cv/models/object_detection/faster_rcnn.py index 6e39cd1dba..db9f865125 100644 --- a/keras_cv/models/object_detection/faster_rcnn.py +++ b/keras_cv/models/object_detection/faster_rcnn.py @@ -73,7 +73,13 @@ def call(self, inputs, training=None): p3_output = self.conv_c3_3x3(p3_output) p2_output = self.conv_c2_3x3(p2_output) - return {2: p2_output, 3: p3_output, 4: p4_output, 5: p5_output, 6: p6_output} + return { + 2: p2_output, + 3: p3_output, + 4: p4_output, + 5: p5_output, + 6: p6_output, + } def get_config(self): config = {} @@ -188,7 +194,9 @@ def __init__( layer = tf.keras.layers.Dense(units=fc_dim, activation="relu") self.fcs.append(layer) self.box_pred = tf.keras.layers.Dense(units=4) - self.cls_score = tf.keras.layers.Dense(units=classes + 1, activation="softmax") + self.cls_score = tf.keras.layers.Dense( + units=classes + 1, activation="softmax" + ) def call(self, feature_map, training=None): x = feature_map @@ -299,7 +307,9 @@ def __init__( nms_score_threshold_train=float("-inf"), nms_score_threshold_test=float("-inf"), ) - self.box_matcher = BoxMatcher(thresholds=[0.0, 0.5], match_values=[-2, -1, 1]) + self.box_matcher = BoxMatcher( + thresholds=[0.0, 0.5], match_values=[-2, -1, 1] + ) self.roi_sampler = _ROISampler( bounding_box_format="yxyx", roi_matcher=self.box_matcher, @@ -348,7 +358,9 @@ def _call_rpn(self, images, anchors, training=None): box_format="yxyx", variance=BOX_VARIANCE, ) - rois, _ = self.roi_generator(decoded_rpn_boxes, rpn_scores, training=training) + rois, _ = self.roi_generator( + decoded_rpn_boxes, rpn_scores, training=training + ) rois = _clip_boxes(rois, "yxyx", image_shape) rpn_boxes = tf.concat(tf.nest.flatten(rpn_boxes), axis=1) rpn_scores = tf.concat(tf.nest.flatten(rpn_scores), axis=1) @@ -361,14 +373,20 @@ def _call_rcnn(self, rois, feature_map, training=None): feature_map, tf.concat([tf.shape(rois)[:2], [-1]], axis=0) ) # [BS, H*W*K, 4], [BS, H*W*K, num_classes + 1] - rcnn_box_pred, rcnn_cls_pred = self.rcnn_head(feature_map, training=training) + rcnn_box_pred, rcnn_cls_pred = self.rcnn_head( + feature_map, training=training + ) return rcnn_box_pred, rcnn_cls_pred def call(self, images, training=None): image_shape = tf.shape(images[0]) anchors = self.anchor_generator(image_shape=image_shape) - rois, feature_map, _, _ = self._call_rpn(images, anchors, training=training) - box_pred, cls_pred = self._call_rcnn(rois, feature_map, training=training) + rois, feature_map, _, _ = self._call_rpn( + images, anchors, training=training + ) + box_pred, cls_pred = self._call_rcnn( + rois, feature_map, training=training + ) if not training: # box_pred is on "center_yxhw" format, convert to target format. box_pred = _decode_deltas_to_boxes( @@ -453,18 +471,26 @@ def compute_loss(self, images, boxes, classes, training): ) = self.rpn_labeler( tf.concat(tf.nest.flatten(anchors), axis=0), boxes, classes ) - rpn_box_weights /= self.rpn_labeler.samples_per_image * global_batch * 0.25 + rpn_box_weights /= ( + self.rpn_labeler.samples_per_image * global_batch * 0.25 + ) rpn_cls_weights /= self.rpn_labeler.samples_per_image * global_batch rois, feature_map, rpn_box_pred, rpn_cls_pred = self._call_rpn( images, anchors, training=training ) rois = tf.stop_gradient(rois) - rois, box_targets, box_weights, cls_targets, cls_weights = self.roi_sampler( - rois, boxes, classes - ) + ( + rois, + box_targets, + box_weights, + cls_targets, + cls_weights, + ) = self.roi_sampler(rois, boxes, classes) box_weights /= self.roi_sampler.num_sampled_rois * global_batch * 0.25 cls_weights /= self.roi_sampler.num_sampled_rois * global_batch - box_pred, cls_pred = self._call_rcnn(rois, feature_map, training=training) + box_pred, cls_pred = self._call_rcnn( + rois, feature_map, training=training + ) y_true = { "rpn_box": rpn_box_targets, "rpn_classification": rpn_cls_targets, @@ -499,12 +525,16 @@ def train_step(self, data): # TODO(tanzhenyu): remove this hack and perform broadcasting elsewhere classes = tf.expand_dims(y["classes"], axis=-1) with tf.GradientTape() as tape: - total_loss = self.compute_loss(images, boxes, classes, training=True) + total_loss = self.compute_loss( + images, boxes, classes, training=True + ) reg_losses = [] if self.weight_decay: for var in self.trainable_variables: if "bn" not in var.name: - reg_losses.append(self.weight_decay * tf.nn.l2_loss(var)) + reg_losses.append( + self.weight_decay * tf.nn.l2_loss(var) + ) l2_loss = tf.math.add_n(reg_losses) total_loss += l2_loss self.optimizer.minimize(total_loss, self.trainable_variables, tape=tape) diff --git a/keras_cv/models/object_detection/faster_rcnn_test.py b/keras_cv/models/object_detection/faster_rcnn_test.py index 2a30e72b7b..74fd4f1a2f 100644 --- a/keras_cv/models/object_detection/faster_rcnn_test.py +++ b/keras_cv/models/object_detection/faster_rcnn_test.py @@ -21,7 +21,9 @@ import keras_cv from keras_cv.models import ResNet50V2 -from keras_cv.models.object_detection.__test_utils__ import _create_bounding_box_dataset +from keras_cv.models.object_detection.__test_utils__ import ( + _create_bounding_box_dataset, +) from keras_cv.models.object_detection.faster_rcnn import FasterRCNN @@ -33,7 +35,9 @@ class FasterRCNNTest(tf.test.TestCase, parameterized.TestCase): ) def test_faster_rcnn_infer(self, batch_shape): model = FasterRCNN( - classes=80, bounding_box_format="xyxy", backbone=self._build_backbone() + classes=80, + bounding_box_format="xyxy", + backbone=self._build_backbone(), ) images = tf.random.normal(batch_shape) outputs = model(images, training=False) @@ -48,7 +52,9 @@ def test_faster_rcnn_infer(self, batch_shape): ) def test_faster_rcnn_train(self, batch_shape): model = FasterRCNN( - classes=80, bounding_box_format="xyxy", backbone=self._build_backbone() + classes=80, + bounding_box_format="xyxy", + backbone=self._build_backbone(), ) images = tf.random.normal(batch_shape) outputs = model(images, training=True) @@ -57,7 +63,9 @@ def test_faster_rcnn_train(self, batch_shape): def test_invalid_compile(self): model = FasterRCNN( - classes=80, bounding_box_format="yxyx", backbone=self._build_backbone() + classes=80, + bounding_box_format="yxyx", + backbone=self._build_backbone(), ) with self.assertRaisesRegex(ValueError, "only accepts"): model.compile(rpn_box_loss="binary_crossentropy") @@ -98,4 +106,6 @@ def test_faster_rcnn_with_dictionary_input_format(self): faster_rcnn.evaluate(dataset) def _build_backbone(self): - return ResNet50V2(include_top=False, include_rescaling=True).as_backbone() + return ResNet50V2( + include_top=False, include_rescaling=True + ).as_backbone() diff --git a/keras_cv/models/object_detection/predict_utils.py b/keras_cv/models/object_detection/predict_utils.py index ad0e9a59fc..b4b597474f 100644 --- a/keras_cv/models/object_detection/predict_utils.py +++ b/keras_cv/models/object_detection/predict_utils.py @@ -33,7 +33,9 @@ def run_step(data): return outputs if model._jit_compile: - run_step = tf.function(run_step, jit_compile=True, reduce_retracing=True) + run_step = tf.function( + run_step, jit_compile=True, reduce_retracing=True + ) data = next(iterator) outputs = model.distribute_strategy.run(run_step, args=(data,)) diff --git a/keras_cv/models/object_detection/retina_net/__internal__/layers/prediction_head.py b/keras_cv/models/object_detection/retina_net/__internal__/layers/prediction_head.py index c3550acdbd..0aeff32429 100644 --- a/keras_cv/models/object_detection/retina_net/__internal__/layers/prediction_head.py +++ b/keras_cv/models/object_detection/retina_net/__internal__/layers/prediction_head.py @@ -29,7 +29,9 @@ class PredictionHead(layers.Layer): or the box regression head depending on `output_filters`. """ - def __init__(self, output_filters, bias_initializer, num_conv_layers=3, **kwargs): + def __init__( + self, output_filters, bias_initializer, num_conv_layers=3, **kwargs + ): super().__init__(**kwargs) self.output_filters = output_filters self.bias_initializer = bias_initializer diff --git a/keras_cv/models/object_detection/retina_net/retina_net.py b/keras_cv/models/object_detection/retina_net/retina_net.py index 8d3708eb6d..d753889a3b 100644 --- a/keras_cv/models/object_detection/retina_net/retina_net.py +++ b/keras_cv/models/object_detection/retina_net/retina_net.py @@ -105,7 +105,9 @@ def __init__( name="RetinaNet", **kwargs, ): - if anchor_generator is not None and (prediction_decoder or label_encoder): + if anchor_generator is not None and ( + prediction_decoder or label_encoder + ): raise ValueError( "`anchor_generator` is only to be provided when " "both `label_encoder` and `prediction_decoder` are both `None`. " @@ -116,8 +118,9 @@ def __init__( "`prediction_decoder` you should provide both to `RetinaNet`, and ensure " "that the `anchor_generator` provided to both is identical" ) - anchor_generator = anchor_generator or RetinaNet.default_anchor_generator( - bounding_box_format + anchor_generator = ( + anchor_generator + or RetinaNet.default_anchor_generator(bounding_box_format) ) label_encoder = label_encoder or cv_layers.RetinaNetLabelEncoder( bounding_box_format=bounding_box_format, @@ -159,8 +162,11 @@ def __init__( self.feature_pyramid = feature_pyramid or layers_lib.FeaturePyramid() prior_probability = tf.constant_initializer(-np.log((1 - 0.01) / 0.01)) - self.classification_head = classification_head or layers_lib.PredictionHead( - output_filters=9 * classes, bias_initializer=prior_probability + self.classification_head = ( + classification_head + or layers_lib.PredictionHead( + output_filters=9 * classes, bias_initializer=prior_probability + ) ) self.box_head = box_head or layers_lib.PredictionHead( @@ -203,7 +209,9 @@ def _forward(self, images, training=None): box_pred = [] for feature in features: box_pred.append( - tf.reshape(self.box_head(feature, training=training), [N, -1, 4]) + tf.reshape( + self.box_head(feature, training=training), [N, -1, 4] + ) ) cls_pred.append( tf.reshape( @@ -345,7 +353,9 @@ def compute_loss(self, images, boxes, classes, training): positive_mask = tf.cast(tf.greater(classes, -1.0), dtype=tf.float32) normalizer = tf.reduce_sum(positive_mask) - cls_weights = tf.cast(tf.math.not_equal(classes, -2.0), dtype=tf.float32) + cls_weights = tf.cast( + tf.math.not_equal(classes, -2.0), dtype=tf.float32 + ) cls_weights /= normalizer box_weights = positive_mask / normalizer y_true = { @@ -388,7 +398,9 @@ def train_step(self, data): if self.weight_decay: for var in self.trainable_variables: if "bn" not in var.name: - reg_losses.append(self.weight_decay * tf.nn.l2_loss(var)) + reg_losses.append( + self.weight_decay * tf.nn.l2_loss(var) + ) l2_loss = tf.math.add_n(reg_losses) total_loss += l2_loss # Training specific code diff --git a/keras_cv/models/object_detection/retina_net/retina_net_inference_test.py b/keras_cv/models/object_detection/retina_net/retina_net_inference_test.py index 9a66ca817b..a860807415 100644 --- a/keras_cv/models/object_detection/retina_net/retina_net_inference_test.py +++ b/keras_cv/models/object_detection/retina_net/retina_net_inference_test.py @@ -41,7 +41,9 @@ def cleanup_global_session(self): def test_weight_setting(self): x, y = _create_bounding_box_dataset(bounding_box_format="xywh") - pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=1) + pretrained_retina_net, new_retina_net = _create_retina_nets( + x, y, epochs=1 + ) new_retina_net.set_weights(pretrained_retina_net.get_weights()) @@ -82,20 +84,28 @@ def test_decoder_doesnt_get_updated(self): pretrained_decoder.suppression_layer.iou_threshold, ) - @pytest.mark.skipif(os.name == "nt", reason="tempfile does not work on windows") + @pytest.mark.skipif( + os.name == "nt", reason="tempfile does not work on windows" + ) def test_savedmodel_creation(self): x, y = _create_bounding_box_dataset(bounding_box_format="xywh") - pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=1) + pretrained_retina_net, new_retina_net = _create_retina_nets( + x, y, epochs=1 + ) tmp = tempfile.mkdtemp() pretrained_retina_net.save(f"{tmp}/checkpoint/") load_model = tf.saved_model.load(f"{tmp}/checkpoint/") _ = load_model(x) - @pytest.mark.skipif(os.name == "nt", reason="tempfile does not work on windows") + @pytest.mark.skipif( + os.name == "nt", reason="tempfile does not work on windows" + ) def test_savedmodel_format_weight_loading(self): x, y = _create_bounding_box_dataset(bounding_box_format="xywh") - pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=1) + pretrained_retina_net, new_retina_net = _create_retina_nets( + x, y, epochs=1 + ) tmp = tempfile.mkdtemp() pretrained_retina_net.save_weights(f"{tmp}/checkpoint/") @@ -127,10 +137,14 @@ def test_set_prediction_decoder(self): pretrained_retina_net.prediction_decoder = prediction_decoder _ = pretrained_retina_net.predict(x) - @pytest.mark.skipif(os.name == "nt", reason="tempfile does not work on windows") + @pytest.mark.skipif( + os.name == "nt", reason="tempfile does not work on windows" + ) def test_weight_loading(self): x, y = _create_bounding_box_dataset(bounding_box_format="xywh") - pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=1) + pretrained_retina_net, new_retina_net = _create_retina_nets( + x, y, epochs=1 + ) tmp = tempfile.mkdtemp() pretrained_retina_net.save_weights(f"{tmp}/checkpoint.h5") @@ -157,7 +171,9 @@ def test_weight_loading(self): def test_weight_loading_via_metrics(self): x, y = _create_bounding_box_dataset(bounding_box_format="xywh") - pretrained_retina_net, new_retina_net = _create_retina_nets(x, y, epochs=30) + pretrained_retina_net, new_retina_net = _create_retina_nets( + x, y, epochs=30 + ) tmp = tempfile.mkdtemp() pretrained_retina_net.save_weights(f"{tmp}/checkpoint.h5") @@ -255,6 +271,10 @@ def _create_bounding_box_dataset(bounding_box_format): ys = tf.tile(ys, [10, 10, 1]) ys = keras_cv.bounding_box.convert_format( - ys, source="rel_xywh", target=bounding_box_format, images=xs, dtype=tf.float32 + ys, + source="rel_xywh", + target=bounding_box_format, + images=xs, + dtype=tf.float32, ) return xs, {"boxes": ys, "classes": y_classes} diff --git a/keras_cv/models/object_detection/retina_net/retina_net_test.py b/keras_cv/models/object_detection/retina_net/retina_net_test.py index 057189a540..5184398230 100644 --- a/keras_cv/models/object_detection/retina_net/retina_net_test.py +++ b/keras_cv/models/object_detection/retina_net/retina_net_test.py @@ -19,7 +19,9 @@ from tensorflow.keras import optimizers import keras_cv -from keras_cv.models.object_detection.__test_utils__ import _create_bounding_box_dataset +from keras_cv.models.object_detection.__test_utils__ import ( + _create_bounding_box_dataset, +) class RetinaNetTest(tf.test.TestCase): @@ -80,7 +82,9 @@ def test_wrong_logits(self): classification_loss=keras_cv.losses.FocalLoss( from_logits=False, reduction="none" ), - box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"), + box_loss=keras_cv.losses.SmoothL1Loss( + l1_cutoff=1.0, reduction="none" + ), ) def test_no_metrics(self): @@ -95,7 +99,9 @@ def test_no_metrics(self): classification_loss=keras_cv.losses.FocalLoss( from_logits=True, reduction="none" ), - box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"), + box_loss=keras_cv.losses.SmoothL1Loss( + l1_cutoff=1.0, reduction="none" + ), ) def test_weights_contained_in_trainable_variables(self): @@ -111,7 +117,9 @@ def test_weights_contained_in_trainable_variables(self): classification_loss=keras_cv.losses.FocalLoss( from_logits=True, reduction="none" ), - box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"), + box_loss=keras_cv.losses.SmoothL1Loss( + l1_cutoff=1.0, reduction="none" + ), ) xs, ys = _create_bounding_box_dataset(bounding_box_format) @@ -119,9 +127,13 @@ def test_weights_contained_in_trainable_variables(self): _ = retina_net(xs) variable_names = [x.name for x in retina_net.trainable_variables] # classification_head - self.assertIn("RetinaNet/prediction_head/conv2d_8/kernel:0", variable_names) + self.assertIn( + "RetinaNet/prediction_head/conv2d_8/kernel:0", variable_names + ) # box_head - self.assertIn("RetinaNet/prediction_head_1/conv2d_12/kernel:0", variable_names) + self.assertIn( + "RetinaNet/prediction_head_1/conv2d_12/kernel:0", variable_names + ) def test_weights_change(self): bounding_box_format = "xywh" @@ -136,7 +148,9 @@ def test_weights_change(self): classification_loss=keras_cv.losses.FocalLoss( from_logits=True, reduction="none" ), - box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"), + box_loss=keras_cv.losses.SmoothL1Loss( + l1_cutoff=1.0, reduction="none" + ), ) xs, ys = _create_bounding_box_dataset(bounding_box_format) @@ -156,11 +170,14 @@ def test_weights_change(self): ) for w1, w2 in zip( - original_classification_head_weights, classification_head_after_fit_weights + original_classification_head_weights, + classification_head_after_fit_weights, ): self.assertNotAllClose(w1, w2) - for w1, w2 in zip(original_box_head_weights, box_head_after_fit_weights): + for w1, w2 in zip( + original_box_head_weights, box_head_after_fit_weights + ): self.assertNotAllClose(w1, w2) for w1, w2 in zip(original_fpn_weights, fpn_after_fit): @@ -188,7 +205,9 @@ def test_fit_coco_metrics(self): classification_loss=keras_cv.losses.FocalLoss( from_logits=True, reduction="none" ), - box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"), + box_loss=keras_cv.losses.SmoothL1Loss( + l1_cutoff=1.0, reduction="none" + ), ) xs, ys = _create_bounding_box_dataset(bounding_box_format) @@ -218,7 +237,9 @@ def test_retina_net_with_dictionary_input_format(self): classification_loss=keras_cv.losses.FocalLoss( from_logits=True, reduction="none" ), - box_loss=keras_cv.losses.SmoothL1Loss(l1_cutoff=1.0, reduction="none"), + box_loss=keras_cv.losses.SmoothL1Loss( + l1_cutoff=1.0, reduction="none" + ), ) retina_net.fit(dataset, epochs=1) diff --git a/keras_cv/models/object_detection_3d/center_pillar.py b/keras_cv/models/object_detection_3d/center_pillar.py index 1ea4bc5cf4..6aed94be3c 100644 --- a/keras_cv/models/object_detection_3d/center_pillar.py +++ b/keras_cv/models/object_detection_3d/center_pillar.py @@ -169,7 +169,9 @@ def call(self, point_xyz, point_feature, point_mask, training=None): # returns dict {"class_1": concat_pred_1, "class_2": concat_pred_2} return predictions - def compute_loss(self, predictions, box_dict, heatmap_dict, top_k_index_dict): + def compute_loss( + self, predictions, box_dict, heatmap_dict, top_k_index_dict + ): y_pred = {} y_true = {} sample_weight = {} @@ -204,7 +206,10 @@ def train_step(self, data): losses = [] with tf.GradientTape() as tape: predictions = self( - x["point_xyz"], x["point_feature"], x["point_mask"], training=True + x["point_xyz"], + x["point_feature"], + x["point_mask"], + training=True, ) losses.append( self.compute_loss( diff --git a/keras_cv/models/object_detection_3d/center_pillar_test.py b/keras_cv/models/object_detection_3d/center_pillar_test.py index aa1b711067..3d64fc7a90 100644 --- a/keras_cv/models/object_detection_3d/center_pillar_test.py +++ b/keras_cv/models/object_detection_3d/center_pillar_test.py @@ -17,9 +17,15 @@ from keras_cv.layers.object_detection_3d.voxelization import DynamicVoxelization from keras_cv.models.__internal__.unet import Block from keras_cv.models.__internal__.unet import UNet -from keras_cv.models.object_detection_3d.center_pillar import MultiClassDetectionHead -from keras_cv.models.object_detection_3d.center_pillar import MultiClassHeatmapDecoder -from keras_cv.models.object_detection_3d.center_pillar import MultiHeadCenterPillar +from keras_cv.models.object_detection_3d.center_pillar import ( + MultiClassDetectionHead, +) +from keras_cv.models.object_detection_3d.center_pillar import ( + MultiClassHeatmapDecoder, +) +from keras_cv.models.object_detection_3d.center_pillar import ( + MultiHeadCenterPillar, +) down_block_configs = [(128, 6), (256, 2), (512, 2)] up_block_configs = [512, 256, 256] @@ -50,9 +56,9 @@ def build_centerpillar_unet(self, input_shape): )(x) x = tf.keras.layers.ReLU()(x) x = Block(128, downsample=False, sync_bn=False)(x) - output = UNet(x.shape[1:], down_block_configs, up_block_configs, sync_bn=False)( - x - ) + output = UNet( + x.shape[1:], down_block_configs, up_block_configs, sync_bn=False + )(x) return tf.keras.Model(input, output) def test_center_pillar_call(self): diff --git a/keras_cv/models/regnet.py b/keras_cv/models/regnet.py index 7f156cf759..e35e86228a 100644 --- a/keras_cv/models/regnet.py +++ b/keras_cv/models/regnet.py @@ -462,7 +462,9 @@ def apply(inputs): x = layers.ReLU(name=name + "_conv_3x3_relu")(x) # Squeeze-Excitation block - x = SqueezeAndExcite2D(filters_out, ratio=squeeze_excite_ratio, name=name)(x) + x = SqueezeAndExcite2D( + filters_out, ratio=squeeze_excite_ratio, name=name + )(x) # conv_1x1_2 x = layers.Conv2D( @@ -667,7 +669,9 @@ def Head(classes=None, name=None, activation=None): def apply(x): x = layers.GlobalAveragePooling2D(name=name + "_head_gap")(x) - x = layers.Dense(classes, name=name + "head_dense", activation=activation)(x) + x = layers.Dense( + classes, name=name + "head_dense", activation=activation + )(x) return x return apply @@ -786,7 +790,9 @@ def RegNet( elif pooling == "max": x = layers.GlobalMaxPooling2D()(x) - model = tf.keras.Model(inputs=img_input, outputs=x, name=model_name, **kwargs) + model = tf.keras.Model( + inputs=img_input, outputs=x, name=model_name, **kwargs + ) # Load weights. if weights is not None: diff --git a/keras_cv/models/resnet_v1.py b/keras_cv/models/resnet_v1.py index f5bd147475..a1925e4051 100644 --- a/keras_cv/models/resnet_v1.py +++ b/keras_cv/models/resnet_v1.py @@ -118,7 +118,11 @@ def BasicBlock(filters, kernel_size=3, stride=1, conv_shortcut=True, name=None): def apply(x): if conv_shortcut: shortcut = layers.Conv2D( - filters, 1, strides=stride, use_bias=False, name=name + "_0_conv" + filters, + 1, + strides=stride, + use_bias=False, + name=name + "_0_conv", )(x) shortcut = layers.BatchNormalization( axis=BN_AXIS, epsilon=1.001e-5, name=name + "_0_bn" @@ -140,7 +144,11 @@ def apply(x): x = layers.Activation("relu", name=name + "_1_relu")(x) x = layers.Conv2D( - filters, kernel_size, padding="SAME", use_bias=False, name=name + "_2_conv" + filters, + kernel_size, + padding="SAME", + use_bias=False, + name=name + "_2_conv", )(x) x = layers.BatchNormalization( axis=BN_AXIS, epsilon=1.001e-5, name=name + "_2_bn" @@ -172,7 +180,11 @@ def Block(filters, kernel_size=3, stride=1, conv_shortcut=True, name=None): def apply(x): if conv_shortcut: shortcut = layers.Conv2D( - 4 * filters, 1, strides=stride, use_bias=False, name=name + "_0_conv" + 4 * filters, + 1, + strides=stride, + use_bias=False, + name=name + "_0_conv", )(x) shortcut = layers.BatchNormalization( axis=BN_AXIS, epsilon=1.001e-5, name=name + "_0_bn" @@ -189,14 +201,20 @@ def apply(x): x = layers.Activation("relu", name=name + "_1_relu")(x) x = layers.Conv2D( - filters, kernel_size, padding="SAME", use_bias=False, name=name + "_2_conv" + filters, + kernel_size, + padding="SAME", + use_bias=False, + name=name + "_2_conv", )(x) x = layers.BatchNormalization( axis=BN_AXIS, epsilon=1.001e-5, name=name + "_2_bn" )(x) x = layers.Activation("relu", name=name + "_2_relu")(x) - x = layers.Conv2D(4 * filters, 1, use_bias=False, name=name + "_3_conv")(x) + x = layers.Conv2D( + 4 * filters, 1, use_bias=False, name=name + "_3_conv" + )(x) x = layers.BatchNormalization( axis=BN_AXIS, epsilon=1.001e-5, name=name + "_3_bn" )(x) @@ -208,7 +226,9 @@ def apply(x): return apply -def Stack(filters, blocks, stride=2, name=None, block_fn=Block, first_shortcut=True): +def Stack( + filters, blocks, stride=2, name=None, block_fn=Block, first_shortcut=True +): """A set of stacked residual blocks. Args: filters: integer, filters of the layers in a block. @@ -226,10 +246,15 @@ def Stack(filters, blocks, stride=2, name=None, block_fn=Block, first_shortcut=T def apply(x): x = block_fn( - filters, stride=stride, name=name + "_block1", conv_shortcut=first_shortcut + filters, + stride=stride, + name=name + "_block1", + conv_shortcut=first_shortcut, )(x) for i in range(2, blocks + 1): - x = block_fn(filters, conv_shortcut=False, name=name + "_block" + str(i))(x) + x = block_fn( + filters, conv_shortcut=False, name=name + "_block" + str(i) + )(x) return x return apply @@ -318,7 +343,9 @@ def ResNet( 64, 7, strides=2, use_bias=False, padding="same", name="conv1_conv" )(x) - x = layers.BatchNormalization(axis=BN_AXIS, epsilon=1.001e-5, name="conv1_bn")(x) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=1.001e-5, name="conv1_bn" + )(x) x = layers.Activation("relu", name="conv1_relu")(x) x = layers.MaxPooling2D(3, strides=2, padding="same", name="pool1_pool")(x) @@ -338,9 +365,9 @@ def ResNet( if include_top: x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( - x - ) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) else: if pooling == "avg": x = layers.GlobalAveragePooling2D(name="avg_pool")(x) diff --git a/keras_cv/models/resnet_v2.py b/keras_cv/models/resnet_v2.py index ef40a5a806..01979bee10 100644 --- a/keras_cv/models/resnet_v2.py +++ b/keras_cv/models/resnet_v2.py @@ -105,7 +105,13 @@ def apply_basic_block( - x, filters, kernel_size=3, stride=1, dilation=1, conv_shortcut=False, name=None + x, + filters, + kernel_size=3, + stride=1, + dilation=1, + conv_shortcut=False, + name=None, ): """A basic residual block (v2). @@ -141,7 +147,9 @@ def apply_basic_block( ) else: shortcut = ( - layers.MaxPooling2D(1, strides=stride, name=name + "_0_max_pooling")(x) + layers.MaxPooling2D( + 1, strides=stride, name=name + "_0_max_pooling" + )(x) if s > 1 else x ) @@ -174,7 +182,13 @@ def apply_basic_block( def apply_block( - x, filters, kernel_size=3, stride=1, dilation=1, conv_shortcut=False, name=None + x, + filters, + kernel_size=3, + stride=1, + dilation=1, + conv_shortcut=False, + name=None, ): """A residual block (v2). @@ -212,14 +226,16 @@ def apply_block( )(use_preactivation) else: shortcut = ( - layers.MaxPooling2D(1, strides=stride, name=name + "_0_max_pooling")(x) + layers.MaxPooling2D( + 1, strides=stride, name=name + "_0_max_pooling" + )(x) if s > 1 else x ) - x = layers.Conv2D(filters, 1, strides=1, use_bias=False, name=name + "_1_conv")( - use_preactivation - ) + x = layers.Conv2D( + filters, 1, strides=1, use_bias=False, name=name + "_1_conv" + )(use_preactivation) x = layers.BatchNormalization( axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_1_bn" )(x) @@ -286,9 +302,13 @@ def apply_stack( f"Received block_type={block_type}." ) - x = block_fn(x, filters, conv_shortcut=first_shortcut, name=name + "_block1") + x = block_fn( + x, filters, conv_shortcut=first_shortcut, name=name + "_block1" + ) for i in range(2, blocks): - x = block_fn(x, filters, dilation=dilations, name=name + "_block" + str(i)) + x = block_fn( + x, filters, dilation=dilations, name=name + "_block" + str(i) + ) x = block_fn( x, filters, @@ -393,7 +413,9 @@ def __init__( name="conv1_conv", )(x) - x = layers.MaxPooling2D(3, strides=2, padding="same", name="pool1_pool")(x) + x = layers.MaxPooling2D( + 3, strides=2, padding="same", name="pool1_pool" + )(x) num_stacks = len(stackwise_filters) if stackwise_dilations is None: @@ -413,9 +435,9 @@ def __init__( ) stack_level_outputs[stack_index + 2] = x - x = layers.BatchNormalization(axis=BN_AXIS, epsilon=BN_EPSILON, name="post_bn")( - x - ) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name="post_bn" + )(x) x = layers.Activation("relu", name="post_relu")(x) if include_top: diff --git a/keras_cv/models/resnet_v2_test.py b/keras_cv/models/resnet_v2_test.py index 545d6a0039..3249dd725d 100644 --- a/keras_cv/models/resnet_v2_test.py +++ b/keras_cv/models/resnet_v2_test.py @@ -61,7 +61,11 @@ def test_model_serialization_tf(self, app, last_dim, args): def test_model_serialization_keras_format(self, app, last_dim, args): if version.parse(tf.__version__) >= version.parse("2.12.0-dev0"): super()._test_model_serialization( - app, last_dim, args, save_format="keras_v3", filename="model.keras" + app, + last_dim, + args, + save_format="keras_v3", + filename="model.keras", ) def test_model_backbone_layer_names_stability(self): diff --git a/keras_cv/models/segmentation/deeplab.py b/keras_cv/models/segmentation/deeplab.py index e5c728faf1..7c7c8a05ef 100644 --- a/keras_cv/models/segmentation/deeplab.py +++ b/keras_cv/models/segmentation/deeplab.py @@ -48,7 +48,10 @@ def build(self, input_shape): width = input_shape[2] feature_map_shape = self.backbone.compute_output_shape(input_shape) self.up_layer = tf.keras.layers.UpSampling2D( - size=(height // feature_map_shape[1], width // feature_map_shape[2]), + size=( + height // feature_map_shape[1], + width // feature_map_shape[2], + ), interpolation="bilinear", ) @@ -66,7 +69,8 @@ def __init__( ): if not isinstance(backbone, tf.keras.layers.Layer): raise ValueError( - "Backbone need to be a `tf.keras.layers.Layer`, " f"received {backbone}" + "Backbone need to be a `tf.keras.layers.Layer`, " + f"received {backbone}" ) if weights and not tf.io.gfile.exists( @@ -95,11 +99,16 @@ def __init__( feature_map = backbone(x) if spatial_pyramid_pooling is None: - spatial_pyramid_pooling = SpatialPyramidPooling(dilation_rates=[6, 12, 18]) + spatial_pyramid_pooling = SpatialPyramidPooling( + dilation_rates=[6, 12, 18] + ) output = spatial_pyramid_pooling(feature_map) output = tf.keras.layers.UpSampling2D( - size=(height // feature_map.shape[1], width // feature_map.shape[2]), + size=( + height // feature_map.shape[1], + width // feature_map.shape[2], + ), interpolation="bilinear", )(output) @@ -142,19 +151,27 @@ def compile(self, weight_decay=0.0001, **kwargs): super().compile(**kwargs) def train_step(self, data): - images, y_true, sample_weight = tf.keras.utils.unpack_x_y_sample_weight(data) + images, y_true, sample_weight = tf.keras.utils.unpack_x_y_sample_weight( + data + ) with tf.GradientTape() as tape: y_pred = self(images, training=True) - total_loss = self.compute_loss(images, y_true, y_pred, sample_weight) + total_loss = self.compute_loss( + images, y_true, y_pred, sample_weight + ) reg_losses = [] if self.weight_decay: for var in self.trainable_variables: if "bn" not in var.name: - reg_losses.append(self.weight_decay * tf.nn.l2_loss(var)) + reg_losses.append( + self.weight_decay * tf.nn.l2_loss(var) + ) l2_loss = tf.math.add_n(reg_losses) total_loss += l2_loss self.optimizer.minimize(total_loss, self.trainable_variables, tape=tape) - return self.compute_metrics(images, y_true, y_pred, sample_weight=sample_weight) + return self.compute_metrics( + images, y_true, y_pred, sample_weight=sample_weight + ) def get_config(self): return { @@ -261,7 +278,9 @@ def __init__( ) ) norm_name = "segmentation_head_norm_{}".format(i) - self._bn_layers.append(tf.keras.layers.BatchNormalization(name=norm_name)) + self._bn_layers.append( + tf.keras.layers.BatchNormalization(name=norm_name) + ) self._classification_layer = tf.keras.layers.Conv2D( name="segmentation_output", @@ -285,7 +304,9 @@ def call(self, inputs): lowest level of feature output as the input for the head. """ if not isinstance(inputs, dict): - raise ValueError(f"Expect the inputs to be a dict, but received {inputs}") + raise ValueError( + f"Expect the inputs to be a dict, but received {inputs}" + ) lowest_level = next(iter(sorted(inputs))) x = inputs[lowest_level] diff --git a/keras_cv/models/segmentation/deeplab_test.py b/keras_cv/models/segmentation/deeplab_test.py index daba76240c..d9f79d0916 100644 --- a/keras_cv/models/segmentation/deeplab_test.py +++ b/keras_cv/models/segmentation/deeplab_test.py @@ -45,9 +45,12 @@ def test_greyscale_input(self): def test_missing_input_shapes(self): with self.assertRaisesRegex( - ValueError, "Input shapes for both the backbone and DeepLabV3 are `None`." + ValueError, + "Input shapes for both the backbone and DeepLabV3 are `None`.", ): - backbone = models.ResNet50V2(include_rescaling=True, include_top=False) + backbone = models.ResNet50V2( + include_rescaling=True, include_top=False + ) segmentation.DeepLabV3(classes=11, backbone=backbone) def test_deeplab_model_with_components(self): @@ -104,7 +107,9 @@ def test_model_train(self): features = tfds.features.FeaturesDict( { "bbox": tfds.features.BBoxFeature(), - "image": tfds.features.Image(shape=(None, None, 3), dtype=tf.uint8), + "image": tfds.features.Image( + shape=(None, None, 3), dtype=tf.uint8 + ), "image/filename": tfds.features.Text(), "label": tfds.features.ClassLabel(num_classes=200), "label_name": tfds.features.Text(), @@ -126,11 +131,15 @@ def test_model_train(self): output_res = [96, 96] num_images = 11788 - image_resizing = tf.keras.layers.Resizing(target_size[1], target_size[0]) + image_resizing = tf.keras.layers.Resizing( + target_size[1], target_size[0] + ) labels_resizing = tf.keras.layers.Resizing(output_res[1], output_res[0]) def resize_images_and_masks(data): - image = tf.image.convert_image_dtype(data["image"], dtype=tf.float32) + image = tf.image.convert_image_dtype( + data["image"], dtype=tf.float32 + ) data["image"] = image_resizing(image) # WARNING: assumes processing unbatched mask = data["segmentation_mask"] @@ -159,7 +168,9 @@ def keep_image_and_mask_only(data): ) model.fit( - training_dataset, epochs=epochs, steps_per_epoch=num_images // batch_size + training_dataset, + epochs=epochs, + steps_per_epoch=num_images // batch_size, ) diff --git a/keras_cv/models/stable_diffusion/clip_tokenizer.py b/keras_cv/models/stable_diffusion/clip_tokenizer.py index 326416b4ab..0f0fa26ae6 100644 --- a/keras_cv/models/stable_diffusion/clip_tokenizer.py +++ b/keras_cv/models/stable_diffusion/clip_tokenizer.py @@ -151,7 +151,9 @@ def bpe(self, token): return token + "" while True: - bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf"))) + bigram = min( + pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")) + ) if bigram not in self.bpe_ranks: break first, second = bigram @@ -166,7 +168,11 @@ def bpe(self, token): new_word.extend(word[i:]) break - if word[i] == first and i < len(word) - 1 and word[i + 1] == second: + if ( + word[i] == first + and i < len(word) - 1 + and word[i + 1] == second + ): new_word.append(first + second) i += 2 else: @@ -188,7 +194,8 @@ def encode(self, text): for token in re.findall(self.pat, text): token = "".join(self.byte_encoder[b] for b in token.encode("utf-8")) bpe_tokens.extend( - self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" ") + self.encoder[bpe_token] + for bpe_token in self.bpe(token).split(" ") ) return [self.start_of_text] + bpe_tokens + [self.end_of_text] diff --git a/keras_cv/models/stable_diffusion/diffusion_model.py b/keras_cv/models/stable_diffusion/diffusion_model.py index 15ece47c4e..9c01f1c27b 100644 --- a/keras_cv/models/stable_diffusion/diffusion_model.py +++ b/keras_cv/models/stable_diffusion/diffusion_model.py @@ -22,7 +22,12 @@ class DiffusionModel(keras.Model): def __init__( - self, img_height, img_width, max_text_length, name=None, download_weights=True + self, + img_height, + img_width, + max_text_length, + name=None, + download_weights=True, ): context = keras.layers.Input((max_text_length, 768)) t_embed_input = keras.layers.Input((320,)) @@ -111,7 +116,12 @@ def __init__( class DiffusionModelV2(keras.Model): def __init__( - self, img_height, img_width, max_text_length, name=None, download_weights=True + self, + img_height, + img_width, + max_text_length, + name=None, + download_weights=True, ): context = keras.layers.Input((max_text_length, 1024)) t_embed_input = keras.layers.Input((320,)) @@ -245,7 +255,9 @@ def __init__(self, num_heads, head_size, fully_connected=False, **kwargs): self.proj1 = keras.layers.Dense(num_heads * head_size) else: self.proj1 = PaddedConv2D(num_heads * head_size, 1) - self.transformer_block = BasicTransformerBlock(channels, num_heads, head_size) + self.transformer_block = BasicTransformerBlock( + channels, num_heads, head_size + ) if fully_connected: self.proj2 = keras.layers.Dense(channels) else: @@ -296,18 +308,28 @@ def call(self, inputs): context = inputs if context is None else context q, k, v = self.to_q(inputs), self.to_k(context), self.to_v(context) q = tf.reshape(q, (-1, inputs.shape[1], self.num_heads, self.head_size)) - k = tf.reshape(k, (-1, context.shape[1], self.num_heads, self.head_size)) - v = tf.reshape(v, (-1, context.shape[1], self.num_heads, self.head_size)) + k = tf.reshape( + k, (-1, context.shape[1], self.num_heads, self.head_size) + ) + v = tf.reshape( + v, (-1, context.shape[1], self.num_heads, self.head_size) + ) q = tf.transpose(q, (0, 2, 1, 3)) # (bs, num_heads, time, head_size) k = tf.transpose(k, (0, 2, 3, 1)) # (bs, num_heads, head_size, time) v = tf.transpose(v, (0, 2, 1, 3)) # (bs, num_heads, time, head_size) score = td_dot(q, k) * self.scale - weights = keras.activations.softmax(score) # (bs, num_heads, time, time) + weights = keras.activations.softmax( + score + ) # (bs, num_heads, time, time) attn = td_dot(weights, v) - attn = tf.transpose(attn, (0, 2, 1, 3)) # (bs, time, num_heads, head_size) - out = tf.reshape(attn, (-1, inputs.shape[1], self.num_heads * self.head_size)) + attn = tf.transpose( + attn, (0, 2, 1, 3) + ) # (bs, time, num_heads, head_size) + out = tf.reshape( + attn, (-1, inputs.shape[1], self.num_heads * self.head_size) + ) return self.out_proj(out) diff --git a/keras_cv/models/stable_diffusion/noise_scheduler.py b/keras_cv/models/stable_diffusion/noise_scheduler.py index d4566770de..ba78f169e0 100644 --- a/keras_cv/models/stable_diffusion/noise_scheduler.py +++ b/keras_cv/models/stable_diffusion/noise_scheduler.py @@ -53,7 +53,8 @@ def __init__( elif beta_schedule == "scaled_linear": # this schedule is very specific to the latent diffusion model. self.betas = ( - tf.linspace(beta_start**0.5, beta_end**0.5, train_timesteps) ** 2 + tf.linspace(beta_start**0.5, beta_end**0.5, train_timesteps) + ** 2 ) else: raise ValueError(f"Invalid beta schedule: {beta_schedule}.") @@ -66,9 +67,13 @@ def __init__( def _get_variance(self, timestep, predicted_variance=None): alpha_prod = self.alphas_cumprod[timestep] - alpha_prod_prev = self.alphas_cumprod[timestep - 1] if timestep > 0 else 1.0 + alpha_prod_prev = ( + self.alphas_cumprod[timestep - 1] if timestep > 0 else 1.0 + ) - variance = (1 - alpha_prod_prev) / (1 - alpha_prod) * self.betas[timestep] + variance = ( + (1 - alpha_prod_prev) / (1 - alpha_prod) * self.betas[timestep] + ) if self.variance_type == "fixed_small": variance = tf.clip_by_value( @@ -76,7 +81,11 @@ def _get_variance(self, timestep, predicted_variance=None): ) elif self.variance_type == "fixed_small_log": variance = tf.log( - (tf.clip_by_value(variance, clip_value_min=1e-20, clip_value_max=1)) + ( + tf.clip_by_value( + variance, clip_value_min=1e-20, clip_value_max=1 + ) + ) ) elif self.variance_type == "fixed_large": variance = self.betas[timestep] @@ -113,7 +122,9 @@ def step( The predicted sample at the previous timestep """ - if model_output.shape[1] == sample.shape[1] * 2 and self.variance_type in [ + if model_output.shape[1] == sample.shape[ + 1 + ] * 2 and self.variance_type in [ "learned", "learned_range", ]: @@ -125,7 +136,9 @@ def step( # 1. compute alphas, betas alpha_prod = self.alphas_cumprod[timestep] - alpha_prod_prev = self.alphas_cumprod[timestep - 1] if timestep > 0 else 1.0 + alpha_prod_prev = ( + self.alphas_cumprod[timestep - 1] if timestep > 0 else 1.0 + ) beta_prod = 1 - alpha_prod beta_prod_prev = 1 - alpha_prod_prev @@ -163,7 +176,9 @@ def step( if timestep > 0: noise = tf.random.normal(model_output.shape) variance = ( - self._get_variance(timestep, predicted_variance=predicted_variance) + self._get_variance( + timestep, predicted_variance=predicted_variance + ) ** 0.5 ) * noise @@ -189,7 +204,8 @@ def add_noise( ) noisy_samples = ( - sqrt_alpha_prod * original_samples + sqrt_one_minus_alpha_prod * noise + sqrt_alpha_prod * original_samples + + sqrt_one_minus_alpha_prod * noise ) return noisy_samples diff --git a/keras_cv/models/stable_diffusion/stable_diffusion.py b/keras_cv/models/stable_diffusion/stable_diffusion.py index d21fb62110..3b71b40b26 100644 --- a/keras_cv/models/stable_diffusion/stable_diffusion.py +++ b/keras_cv/models/stable_diffusion/stable_diffusion.py @@ -115,7 +115,9 @@ def encode_text(self, prompt): phrase = inputs + [49407] * (MAX_PROMPT_LENGTH - len(inputs)) phrase = tf.convert_to_tensor([phrase], dtype=tf.int32) - context = self.text_encoder.predict_on_batch([phrase, self._get_pos_ids()]) + context = self.text_encoder.predict_on_batch( + [phrase, self._get_pos_ids()] + ) return context @@ -212,13 +214,19 @@ def generate_image( unconditional_latent = self.diffusion_model.predict_on_batch( [latent, t_emb, unconditional_context] ) - latent = self.diffusion_model.predict_on_batch([latent, t_emb, context]) + latent = self.diffusion_model.predict_on_batch( + [latent, t_emb, context] + ) latent = unconditional_latent + unconditional_guidance_scale * ( latent - unconditional_latent ) a_t, a_prev = alphas[index], alphas_prev[index] - pred_x0 = (latent_prev - math.sqrt(1 - a_t) * latent) / math.sqrt(a_t) - latent = latent * math.sqrt(1.0 - a_prev) + math.sqrt(a_prev) * pred_x0 + pred_x0 = (latent_prev - math.sqrt(1 - a_t) * latent) / math.sqrt( + a_t + ) + latent = ( + latent * math.sqrt(1.0 - a_prev) + math.sqrt(a_prev) * pred_x0 + ) iteration += 1 progbar.update(iteration) @@ -343,12 +351,19 @@ def inpaint( unconditional_latent = self.diffusion_model.predict_on_batch( [latent, t_emb, unconditional_context] ) - latent = self.diffusion_model.predict_on_batch([latent, t_emb, context]) + latent = self.diffusion_model.predict_on_batch( + [latent, t_emb, context] + ) latent = unconditional_latent + unconditional_guidance_scale * ( latent - unconditional_latent ) - pred_x0 = (latent_prev - math.sqrt(1 - a_t) * latent) / math.sqrt(a_t) - latent = latent * math.sqrt(1.0 - a_prev) + math.sqrt(a_prev) * pred_x0 + pred_x0 = ( + latent_prev - math.sqrt(1 - a_t) * latent + ) / math.sqrt(a_t) + latent = ( + latent * math.sqrt(1.0 - a_prev) + + math.sqrt(a_prev) * pred_x0 + ) # Use known image (x0) to compute latent if timestep > 1: @@ -443,7 +458,9 @@ def tokenizer(self): self._tokenizer = SimpleTokenizer() return self._tokenizer - def _get_timestep_embedding(self, timestep, batch_size, dim=320, max_period=10000): + def _get_timestep_embedding( + self, timestep, batch_size, dim=320, max_period=10000 + ): half = dim // 2 freqs = tf.math.exp( -math.log(max_period) * tf.range(0, half, dtype=tf.float32) / half @@ -472,7 +489,9 @@ def _get_initial_diffusion_noise(self, batch_size, seed): @staticmethod def _get_pos_ids(): - return tf.convert_to_tensor([list(range(MAX_PROMPT_LENGTH))], dtype=tf.int32) + return tf.convert_to_tensor( + [list(range(MAX_PROMPT_LENGTH))], dtype=tf.int32 + ) class StableDiffusion(StableDiffusionBase): diff --git a/keras_cv/models/stable_diffusion/stable_diffusion_test.py b/keras_cv/models/stable_diffusion/stable_diffusion_test.py index f0566e04d7..809d100ca6 100644 --- a/keras_cv/models/stable_diffusion/stable_diffusion_test.py +++ b/keras_cv/models/stable_diffusion/stable_diffusion_test.py @@ -24,7 +24,9 @@ def DISABLED_test_end_to_end_golden_value(self): stablediff = StableDiffusion(128, 128) img = stablediff.text_to_image(prompt, seed=1337) - self.assertAllClose(img[0][13:14, 13:14, :][0][0], [15, 248, 229], atol=1e-4) + self.assertAllClose( + img[0][13:14, 13:14, :][0][0], [15, 248, 229], atol=1e-4 + ) # Verify that the step-by-step creation flow creates an identical output text_encoding = stablediff.encode_text(prompt) @@ -41,7 +43,8 @@ def DISABLED_test_generate_image_rejects_noise_and_seed(self): stablediff = StableDiffusion(128, 128) with self.assertRaisesRegex( - ValueError, r"`diffusion_noise` and `seed` should not both be passed" + ValueError, + r"`diffusion_noise` and `seed` should not both be passed", ): _ = stablediff.generate_image( stablediff.encode_text("thou shall not render"), diff --git a/keras_cv/models/stable_diffusion/text_encoder.py b/keras_cv/models/stable_diffusion/text_encoder.py index 8e958fedf0..b458ce6043 100644 --- a/keras_cv/models/stable_diffusion/text_encoder.py +++ b/keras_cv/models/stable_diffusion/text_encoder.py @@ -18,8 +18,12 @@ class TextEncoder(keras.Model): - def __init__(self, max_length, vocab_size=49408, name=None, download_weights=True): - tokens = keras.layers.Input(shape=(max_length,), dtype="int32", name="tokens") + def __init__( + self, max_length, vocab_size=49408, name=None, download_weights=True + ): + tokens = keras.layers.Input( + shape=(max_length,), dtype="int32", name="tokens" + ) positions = keras.layers.Input( shape=(max_length,), dtype="int32", name="positions" ) @@ -38,8 +42,12 @@ def __init__(self, max_length, vocab_size=49408, name=None, download_weights=Tru class TextEncoderV2(keras.Model): - def __init__(self, max_length, vocab_size=49408, name=None, download_weights=True): - tokens = keras.layers.Input(shape=(max_length,), dtype="int32", name="tokens") + def __init__( + self, max_length, vocab_size=49408, name=None, download_weights=True + ): + tokens = keras.layers.Input( + shape=(max_length,), dtype="int32", name="tokens" + ) positions = keras.layers.Input( shape=(max_length,), dtype="int32", name="positions" ) @@ -62,7 +70,9 @@ def quick_gelu(x): class CLIPEmbedding(keras.layers.Layer): - def __init__(self, input_dim=49408, output_dim=768, max_length=77, **kwargs): + def __init__( + self, input_dim=49408, output_dim=768, max_length=77, **kwargs + ): super().__init__(**kwargs) self.token_embedding = keras.layers.Embedding(input_dim, output_dim) self.position_embedding = keras.layers.Embedding(max_length, output_dim) @@ -111,14 +121,19 @@ def __init__(self, embed_dim=768, num_heads=12, causal=True, **kwargs): self.out_proj = keras.layers.Dense(self.embed_dim) def reshape_states(self, x, sequence_length, batch_size): - x = tf.reshape(x, (batch_size, sequence_length, self.num_heads, self.head_dim)) - return tf.transpose(x, (0, 2, 1, 3)) # bs, heads, sequence_length, head_dim + x = tf.reshape( + x, (batch_size, sequence_length, self.num_heads, self.head_dim) + ) + return tf.transpose( + x, (0, 2, 1, 3) + ) # bs, heads, sequence_length, head_dim def call(self, inputs, attention_mask=None): if attention_mask is None and self.causal: length = tf.shape(inputs)[1] attention_mask = tfnp.triu( - tf.ones((1, 1, length, length), dtype=self.compute_dtype) * -tfnp.inf, + tf.ones((1, 1, length, length), dtype=self.compute_dtype) + * -tfnp.inf, k=1, ) @@ -136,7 +151,9 @@ def call(self, inputs, attention_mask=None): value_states = tf.reshape(value_states, proj_shape) attn_weights = query_states @ tf.transpose(key_states, (0, 2, 1)) - attn_weights = tf.reshape(attn_weights, (-1, self.num_heads, tgt_len, src_len)) + attn_weights = tf.reshape( + attn_weights, (-1, self.num_heads, tgt_len, src_len) + ) attn_weights = attn_weights + attention_mask attn_weights = tf.reshape(attn_weights, (-1, tgt_len, src_len)) diff --git a/keras_cv/models/utils_test.py b/keras_cv/models/utils_test.py index df6cf7a217..606b5e1a9d 100644 --- a/keras_cv/models/utils_test.py +++ b/keras_cv/models/utils_test.py @@ -27,7 +27,9 @@ def test_parse_model_inputs(self): self.assertEqual(inputs.shape.as_list(), list((None,) + input_shape)) input_tensor = layers.Input(shape=input_shape) - self.assertIs(utils.parse_model_inputs(input_shape, input_tensor), input_tensor) + self.assertIs( + utils.parse_model_inputs(input_shape, input_tensor), input_tensor + ) def test_as_backbone_missing_backbone_level_outputs(self): model = tf.keras.models.Sequential() diff --git a/keras_cv/models/vgg16.py b/keras_cv/models/vgg16.py index 4c3ad2e1aa..0f5f2a9656 100644 --- a/keras_cv/models/vgg16.py +++ b/keras_cv/models/vgg16.py @@ -149,9 +149,9 @@ def VGG16( x = layers.Flatten(name="flatten")(x) x = layers.Dense(4096, activation="relu", name="fc1")(x) x = layers.Dense(4096, activation="relu", name="fc2")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( - x - ) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) else: if pooling == "avg": x = layers.GlobalAveragePooling2D()(x) diff --git a/keras_cv/models/vgg19.py b/keras_cv/models/vgg19.py index 8df3573c05..287601b5e4 100644 --- a/keras_cv/models/vgg19.py +++ b/keras_cv/models/vgg19.py @@ -163,9 +163,9 @@ def VGG19( x = layers.Flatten(name="flatten")(x) x = layers.Dense(4096, activation="relu", name="fc1")(x) x = layers.Dense(4096, activation="relu", name="fc2")(x) - x = layers.Dense(classes, activation=classifier_activation, name="predictions")( - x - ) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) else: if pooling == "avg": x = layers.GlobalAveragePooling2D()(x) diff --git a/keras_cv/models/vit.py b/keras_cv/models/vit.py index a66993bd0b..182aff3112 100644 --- a/keras_cv/models/vit.py +++ b/keras_cv/models/vit.py @@ -335,7 +335,9 @@ def ViTTiny16( pooling=pooling, classes=classes, patch_size=MODEL_CONFIGS["ViTTiny16"]["patch_size"], - transformer_layer_num=MODEL_CONFIGS["ViTTiny16"]["transformer_layer_num"], + transformer_layer_num=MODEL_CONFIGS["ViTTiny16"][ + "transformer_layer_num" + ], project_dim=MODEL_CONFIGS["ViTTiny16"]["project_dim"], mlp_dim=MODEL_CONFIGS["ViTTiny16"]["mlp_dim"], num_heads=MODEL_CONFIGS["ViTTiny16"]["num_heads"], @@ -525,7 +527,9 @@ def ViTTiny32( pooling=pooling, classes=classes, patch_size=MODEL_CONFIGS["ViTTiny32"]["patch_size"], - transformer_layer_num=MODEL_CONFIGS["ViTTiny32"]["transformer_layer_num"], + transformer_layer_num=MODEL_CONFIGS["ViTTiny32"][ + "transformer_layer_num" + ], project_dim=MODEL_CONFIGS["ViTTiny32"]["project_dim"], mlp_dim=MODEL_CONFIGS["ViTTiny32"]["mlp_dim"], num_heads=MODEL_CONFIGS["ViTTiny32"]["num_heads"], diff --git a/keras_cv/ops/iou_3d_test.py b/keras_cv/ops/iou_3d_test.py index da304582ca..0a3208ec2b 100644 --- a/keras_cv/ops/iou_3d_test.py +++ b/keras_cv/ops/iou_3d_test.py @@ -24,7 +24,8 @@ class IoU3DTest(tf.test.TestCase): @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def testOpCall(self): diff --git a/keras_cv/point_cloud/point_cloud.py b/keras_cv/point_cloud/point_cloud.py index 915a819a38..246ce38a7d 100644 --- a/keras_cv/point_cloud/point_cloud.py +++ b/keras_cv/point_cloud/point_cloud.py @@ -44,7 +44,9 @@ def within_box3d_index(points, boxes): results = [] for i in range(num_samples): results.append( - custom_ops.ops.kcv_within_box(points[i], boxes[i])[tf.newaxis, ...] + custom_ops.ops.kcv_within_box(points[i], boxes[i])[ + tf.newaxis, ... + ] ) return tf.concat(results, axis=0) else: @@ -297,7 +299,8 @@ def _box_area(boxes): boxes_roll = tf.roll(boxes, shift=1, axis=-2) det = ( tf.reduce_sum( - boxes[..., 0] * boxes_roll[..., 1] - boxes[..., 1] * boxes_roll[..., 0], + boxes[..., 0] * boxes_roll[..., 1] + - boxes[..., 1] * boxes_roll[..., 0], axis=-1, keepdims=True, ) @@ -327,10 +330,12 @@ def is_within_box2d(points, boxes): ) is_inside = tf.math.logical_and( tf.math.logical_and( - _is_on_lefthand_side(points, v1, v2), _is_on_lefthand_side(points, v2, v3) + _is_on_lefthand_side(points, v1, v2), + _is_on_lefthand_side(points, v2, v3), ), tf.math.logical_and( - _is_on_lefthand_side(points, v3, v4), _is_on_lefthand_side(points, v4, v1) + _is_on_lefthand_side(points, v3, v4), + _is_on_lefthand_side(points, v4, v1), ), ) valid_area = tf.greater(_box_area(boxes), 0) @@ -433,7 +438,9 @@ def coordinate_transform(points, pose): rotation_matrix = _get_3d_rotation_matrix(yaw, roll, pitch) # Finally, rotate the points about the pose's origin according to the # rotation matrix. - rotated_points = tf.einsum("...i,...ij->...j", translated_points, rotation_matrix) + rotated_points = tf.einsum( + "...i,...ij->...j", translated_points, rotation_matrix + ) return rotated_points @@ -476,7 +483,9 @@ def within_a_frustum(points, center, r_distance, theta_width, phi_width): points are within the frustum. """ - r, theta, phi = tf.unstack(spherical_coordinate_transform(points[:, :3]), axis=-1) + r, theta, phi = tf.unstack( + spherical_coordinate_transform(points[:, :3]), axis=-1 + ) _, center_theta, center_phi = tf.unstack( spherical_coordinate_transform(center[tf.newaxis, :]), axis=-1 diff --git a/keras_cv/point_cloud/point_cloud_test.py b/keras_cv/point_cloud/point_cloud_test.py index f1bc7a7d3a..dc0e49bbbb 100644 --- a/keras_cv/point_cloud/point_cloud_test.py +++ b/keras_cv/point_cloud/point_cloud_test.py @@ -24,7 +24,9 @@ class AngleTest(tf.test.TestCase): def test_wrap_angle_radians(self): - self.assertAllClose(-np.pi + 0.1, point_cloud.wrap_angle_radians(np.pi + 0.1)) + self.assertAllClose( + -np.pi + 0.1, point_cloud.wrap_angle_radians(np.pi + 0.1) + ) self.assertAllClose(0.0, point_cloud.wrap_angle_radians(2 * np.pi)) @@ -33,7 +35,10 @@ def test_convert_center_to_corners(self): boxes = tf.constant( [ [[1, 2, 3, 4, 3, 6, 0], [1, 2, 3, 4, 3, 6, 0]], - [[1, 2, 3, 4, 3, 6, np.pi / 2.0], [1, 2, 3, 4, 3, 6, np.pi / 2.0]], + [ + [1, 2, 3, 4, 3, 6, np.pi / 2.0], + [1, 2, 3, 4, 3, 6, np.pi / 2.0], + ], ] ) corners = point_cloud._center_xyzWHD_to_corner_xyz(boxes) @@ -212,7 +217,9 @@ def test_within_box3d(self): points = tf.tile(points[tf.newaxis, ...], [batch_size, 1, 1]) bboxes = tf.tile(bboxes[tf.newaxis, ...], [batch_size, 1, 1]) is_inside = point_cloud.is_within_box3d(points, bboxes) - self.assertAllEqual([batch_size, num_points, num_boxes], is_inside.shape) + self.assertAllEqual( + [batch_size, num_points, num_boxes], is_inside.shape + ) for batch_idx in range(batch_size): self.assertAllEqual(expected_is_inside, is_inside[batch_idx]) @@ -254,7 +261,9 @@ def testCoordinateTransform(self): def testSphericalCoordinatesTransform(self): np_xyz = np.random.randn(5, 6, 3) points = tf.constant(np_xyz, dtype=tf.float32) - spherical_coordinates = point_cloud.spherical_coordinate_transform(points) + spherical_coordinates = point_cloud.spherical_coordinate_transform( + points + ) # Convert coordinates back to xyz to verify. dist = spherical_coordinates[..., 0] @@ -270,7 +279,8 @@ def testSphericalCoordinatesTransform(self): self.assertAllClose(z, np_xyz[..., 2]) @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_group_points(self): @@ -306,7 +316,11 @@ def test_group_points(self): [5.0, 7.0, 8.0], # none [1.0, 5.0, 3.6], # box0, box1 [-11.6, -10.0, -10.0], # box3 (rotated corner point). - [-11.4, -11.4, -10.0], # not in box3, would be if not rotated. + [ + -11.4, + -11.4, + -10.0, + ], # not in box3, would be if not rotated. ], dtype=tf.float32, ) @@ -318,7 +332,9 @@ def test_group_points(self): def testWithinAFrustum(self): center = tf.constant([1.0, 1.0, 1.0]) - points = tf.constant([[0.0, 0.0, 0.0], [1.0, 2.0, 1.0], [1.0, 0.0, 1.0]]) + points = tf.constant( + [[0.0, 0.0, 0.0], [1.0, 2.0, 1.0], [1.0, 0.0, 1.0]] + ) point_mask = point_cloud.within_a_frustum( points, center, r_distance=1.0, theta_width=1.0, phi_width=1.0 diff --git a/keras_cv/point_cloud/within_box_3d_test.py b/keras_cv/point_cloud/within_box_3d_test.py index 63f89254f6..ac4c084b90 100644 --- a/keras_cv/point_cloud/within_box_3d_test.py +++ b/keras_cv/point_cloud/within_box_3d_test.py @@ -33,10 +33,16 @@ def get_points_boxes(): points_z = 5.0 * tf.ones(shape=[num_points, 1], dtype=tf.float32) points = tf.concat([points, points_z], axis=-1) boxes_x = tf.random.uniform( - shape=[num_boxes, 1], minval=0, maxval=box_dimension - 1.0, dtype=tf.float32 + shape=[num_boxes, 1], + minval=0, + maxval=box_dimension - 1.0, + dtype=tf.float32, ) boxes_y = tf.random.uniform( - shape=[num_boxes, 1], minval=0, maxval=box_dimension - 1.0, dtype=tf.float32 + shape=[num_boxes, 1], + minval=0, + maxval=box_dimension - 1.0, + dtype=tf.float32, ) boxes_dx = tf.random.uniform( shape=[num_boxes, 1], minval=0, maxval=5.0, dtype=tf.float32 @@ -50,14 +56,16 @@ def get_points_boxes(): boxes_dz = 3.0 * tf.ones([num_boxes, 1], dtype=tf.float32) boxes_angle = tf.zeros([num_boxes, 1], dtype=tf.float32) boxes = tf.concat( - [boxes_x, boxes_y, boxes_z, boxes_dx, boxes_dy, boxes_dz, boxes_angle], axis=-1 + [boxes_x, boxes_y, boxes_z, boxes_dx, boxes_dy, boxes_dz, boxes_angle], + axis=-1, ) return points, boxes class WithinBox3DTest(tf.test.TestCase): @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_unbatched_unrotated(self): @@ -85,7 +93,8 @@ def test_unbatched_unrotated(self): self.assertAllEqual([0, 0, -1, 0, -1, 1, -1], res) @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_unbatched_rotated(self): @@ -111,7 +120,8 @@ def test_unbatched_rotated(self): self.assertAllClose([0, 0, -1, 0, -1], res) @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_batched_unrotated(self): @@ -144,7 +154,8 @@ def test_batched_unrotated(self): ) @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_batched_rotated(self): @@ -174,7 +185,8 @@ def test_batched_rotated(self): self.assertAllEqual([[0, 0, -1, 0, -1], [-1, -1, -1, -1, -1]], res) @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_many_points(self): @@ -185,16 +197,23 @@ def test_many_points(self): self.assertAllClose(res.shape, points.shape[:1]) @pytest.mark.skipif( - "TEST_CUSTOM_OPS" not in os.environ or os.environ["TEST_CUSTOM_OPS"] != "true", + "TEST_CUSTOM_OPS" not in os.environ + or os.environ["TEST_CUSTOM_OPS"] != "true", reason="Requires binaries compiled from source", ) def test_equal(self): for _ in range(10000): with tf.device("cpu:0"): - box_center = tf.random.uniform(shape=[1, 3], minval=-10.0, maxval=10.0) - box_dim = tf.random.uniform(shape=[1, 3], minval=0.1, maxval=10.0) + box_center = tf.random.uniform( + shape=[1, 3], minval=-10.0, maxval=10.0 + ) + box_dim = tf.random.uniform( + shape=[1, 3], minval=0.1, maxval=10.0 + ) boxes = tf.concat([box_center, box_dim, [[0.0]]], axis=-1) points = tf.random.normal([32, 3]) res = keras_cv.point_cloud.is_within_any_box3d(points, boxes) - res_v2 = keras_cv.point_cloud.is_within_any_box3d_v2(points, boxes) + res_v2 = keras_cv.point_cloud.is_within_any_box3d_v2( + points, boxes + ) self.assertAllEqual(res, res_v2) diff --git a/keras_cv/tools/checkpoint_conversion/ViT_weight_conversion.ipynb b/keras_cv/tools/checkpoint_conversion/ViT_weight_conversion.ipynb index cfe0a926e5..20b83ed1b7 100644 --- a/keras_cv/tools/checkpoint_conversion/ViT_weight_conversion.ipynb +++ b/keras_cv/tools/checkpoint_conversion/ViT_weight_conversion.ipynb @@ -1367,7 +1367,9 @@ "model.layers[1].weights[1].assign(tf.Variable(params_jax[\"embedding/kernel\"]))\n", "model.layers[1].weights[2].assign(tf.Variable(params_jax[\"embedding/bias\"]))\n", "model.layers[1].weights[3].assign(\n", - " tf.Variable(params_jax[\"Transformer/posembed_input/pos_embedding\"].squeeze())\n", + " tf.Variable(\n", + " params_jax[\"Transformer/posembed_input/pos_embedding\"].squeeze()\n", + " )\n", ")" ] }, @@ -1487,16 +1489,24 @@ "# Copy Transformer Encoders\n", "for i in range(model_to_convert[1][1]):\n", " model.layers[3 + i].weights[0].assign(\n", - " tf.Variable(params_jax[f\"Transformer/encoderblock_{i}/LayerNorm_0/scale\"])\n", + " tf.Variable(\n", + " params_jax[f\"Transformer/encoderblock_{i}/LayerNorm_0/scale\"]\n", + " )\n", " )\n", " model.layers[3 + i].weights[1].assign(\n", - " tf.Variable(params_jax[f\"Transformer/encoderblock_{i}/LayerNorm_0/bias\"])\n", + " tf.Variable(\n", + " params_jax[f\"Transformer/encoderblock_{i}/LayerNorm_0/bias\"]\n", + " )\n", " )\n", " model.layers[3 + i].weights[2].assign(\n", - " tf.Variable(params_jax[f\"Transformer/encoderblock_{i}/LayerNorm_2/scale\"])\n", + " tf.Variable(\n", + " params_jax[f\"Transformer/encoderblock_{i}/LayerNorm_2/scale\"]\n", + " )\n", " )\n", " model.layers[3 + i].weights[3].assign(\n", - " tf.Variable(params_jax[f\"Transformer/encoderblock_{i}/LayerNorm_2/bias\"])\n", + " tf.Variable(\n", + " params_jax[f\"Transformer/encoderblock_{i}/LayerNorm_2/bias\"]\n", + " )\n", " )\n", " model.layers[3 + i].weights[4].assign(\n", " tf.Variable(\n", @@ -1556,19 +1566,27 @@ " )\n", " model.layers[3 + i].weights[12].assign(\n", " tf.Variable(\n", - " params_jax[f\"Transformer/encoderblock_{i}/MlpBlock_3/Dense_0/kernel\"]\n", + " params_jax[\n", + " f\"Transformer/encoderblock_{i}/MlpBlock_3/Dense_0/kernel\"\n", + " ]\n", " )\n", " )\n", " model.layers[3 + i].weights[13].assign(\n", - " tf.Variable(params_jax[f\"Transformer/encoderblock_{i}/MlpBlock_3/Dense_0/bias\"])\n", + " tf.Variable(\n", + " params_jax[f\"Transformer/encoderblock_{i}/MlpBlock_3/Dense_0/bias\"]\n", + " )\n", " )\n", " model.layers[3 + i].weights[14].assign(\n", " tf.Variable(\n", - " params_jax[f\"Transformer/encoderblock_{i}/MlpBlock_3/Dense_1/kernel\"]\n", + " params_jax[\n", + " f\"Transformer/encoderblock_{i}/MlpBlock_3/Dense_1/kernel\"\n", + " ]\n", " )\n", " )\n", " model.layers[3 + i].weights[15].assign(\n", - " tf.Variable(params_jax[f\"Transformer/encoderblock_{i}/MlpBlock_3/Dense_1/bias\"])\n", + " tf.Variable(\n", + " params_jax[f\"Transformer/encoderblock_{i}/MlpBlock_3/Dense_1/bias\"]\n", + " )\n", " )" ] }, diff --git a/keras_cv/training/contrastive/contrastive_trainer.py b/keras_cv/training/contrastive/contrastive_trainer.py index 862222fdf4..47ce04c979 100644 --- a/keras_cv/training/contrastive/contrastive_trainer.py +++ b/keras_cv/training/contrastive/contrastive_trainer.py @@ -133,7 +133,9 @@ def compile( ) if self.probe and not probe_loss: - raise ValueError("`probe_loss` must be specified when a probe is included.") + raise ValueError( + "`probe_loss` must be specified when a probe is included." + ) if "loss" in kwargs: raise ValueError( @@ -177,7 +179,9 @@ def fit( x=x, y=y, sample_weight=sample_weight, batch_size=batch_size ) - dataset = dataset.map(self.run_augmenters, num_parallel_calls=tf.data.AUTOTUNE) + dataset = dataset.map( + self.run_augmenters, num_parallel_calls=tf.data.AUTOTUNE + ) dataset = dataset.prefetch(tf.data.AUTOTUNE) return super().fit(x=dataset, **kwargs) @@ -206,7 +210,9 @@ def train_step(self, data): projections_1 = self.projectors[1](features_1, training=True) loss = self.compiled_loss( - projections_0, projections_1, regularization_losses=self.encoder.losses + projections_0, + projections_1, + regularization_losses=self.encoder.losses, ) gradients = tape.gradient( @@ -228,9 +234,13 @@ def train_step(self, data): if self.probe: if labels is None: - raise ValueError("Targets must be provided when a probe is specified") + raise ValueError( + "Targets must be provided when a probe is specified" + ) with tf.GradientTape() as tape: - features = tf.stop_gradient(self.encoder(images, training=False)) + features = tf.stop_gradient( + self.encoder(images, training=False) + ) class_logits = self.probe(features, training=True) probe_loss = self.probe_loss(labels, class_logits) gradients = tape.gradient(probe_loss, self.probe.trainable_weights) diff --git a/keras_cv/training/contrastive/contrastive_trainer_test.py b/keras_cv/training/contrastive/contrastive_trainer_test.py index 6bcc9256f9..50c0f5a6ce 100644 --- a/keras_cv/training/contrastive/contrastive_trainer_test.py +++ b/keras_cv/training/contrastive/contrastive_trainer_test.py @@ -82,7 +82,9 @@ def test_train_with_probing(self): trainer_with_probing.compile( encoder_optimizer=optimizers.Adam(), encoder_loss=SimCLRLoss(temperature=0.5), - probe_metrics=[metrics.TopKCategoricalAccuracy(3, "top3_probe_accuracy")], + probe_metrics=[ + metrics.TopKCategoricalAccuracy(3, "top3_probe_accuracy") + ], probe_optimizer=optimizers.Adam(), probe_loss=keras.losses.CategoricalCrossentropy(from_logits=True), ) @@ -162,7 +164,9 @@ def build_augmenter(self): return preprocessing.RandomFlip("horizontal") def build_encoder(self): - return DenseNet121(include_rescaling=False, include_top=False, pooling="avg") + return DenseNet121( + include_rescaling=False, include_top=False, pooling="avg" + ) def build_projector(self): return layers.Dense(128) diff --git a/keras_cv/training/contrastive/simclr_trainer.py b/keras_cv/training/contrastive/simclr_trainer.py index 2c5e1db57d..5cf7c5da52 100644 --- a/keras_cv/training/contrastive/simclr_trainer.py +++ b/keras_cv/training/contrastive/simclr_trainer.py @@ -75,7 +75,8 @@ def __init__( aspect_ratio_factor=aspect_ratio_factor, ), preprocessing.MaybeApply( - preprocessing.Grayscale(output_channels=3), rate=grayscale_rate + preprocessing.Grayscale(output_channels=3), + rate=grayscale_rate, ), preprocessing.MaybeApply( preprocessing.RandomColorJitter( diff --git a/keras_cv/training/contrastive/simclr_trainer_test.py b/keras_cv/training/contrastive/simclr_trainer_test.py index 6baa829390..5ad0e5a180 100644 --- a/keras_cv/training/contrastive/simclr_trainer_test.py +++ b/keras_cv/training/contrastive/simclr_trainer_test.py @@ -37,4 +37,6 @@ def test_train_without_probing(self): simclr_without_probing.fit(images) def build_encoder(self): - return ResNet50V2(include_rescaling=False, include_top=False, pooling="avg") + return ResNet50V2( + include_rescaling=False, include_top=False, pooling="avg" + ) diff --git a/keras_cv/utils/__init__.py b/keras_cv/utils/__init__.py index 046db85c17..98373f5be5 100644 --- a/keras_cv/utils/__init__.py +++ b/keras_cv/utils/__init__.py @@ -13,7 +13,9 @@ # limitations under the License. from keras_cv.utils import test_utils -from keras_cv.utils.conditional_imports import assert_waymo_open_dataset_installed +from keras_cv.utils.conditional_imports import ( + assert_waymo_open_dataset_installed, +) from keras_cv.utils.fill_utils import fill_rectangle from keras_cv.utils.preprocessing import blend from keras_cv.utils.preprocessing import ensure_tensor diff --git a/keras_cv/utils/conv_utils.py b/keras_cv/utils/conv_utils.py index 0d8bab51fe..78600a6fcb 100644 --- a/keras_cv/utils/conv_utils.py +++ b/keras_cv/utils/conv_utils.py @@ -31,7 +31,8 @@ def normalize_tuple(value, n, name, allow_zero=False): passed. """ error_msg = ( - f"The `{name}` argument must be a tuple of {n} " f"integers. Received: {value}" + f"The `{name}` argument must be a tuple of {n} " + f"integers. Received: {value}" ) if isinstance(value, int): @@ -48,7 +49,8 @@ def normalize_tuple(value, n, name, allow_zero=False): int(single_value) except (ValueError, TypeError): error_msg += ( - f"including element {single_value} of " f"type {type(single_value)}" + f"including element {single_value} of " + f"type {type(single_value)}" ) raise ValueError(error_msg) diff --git a/keras_cv/utils/fill_utils.py b/keras_cv/utils/fill_utils.py index d5fe0aa646..694737bba5 100644 --- a/keras_cv/utils/fill_utils.py +++ b/keras_cv/utils/fill_utils.py @@ -24,7 +24,9 @@ def _axis_mask(starts, ends, mask_len): axis_indices = tf.tile(axis_indices, [batch_size, 1]) # mask of index bounds - axis_mask = tf.greater_equal(axis_indices, starts) & tf.less(axis_indices, ends) + axis_mask = tf.greater_equal(axis_indices, starts) & tf.less( + axis_indices, ends + ) return axis_mask @@ -72,7 +74,9 @@ def fill_rectangle(images, centers_x, centers_y, widths, heights, fill_values): xywh = tf.stack([centers_x, centers_y, widths, heights], axis=1) xywh = tf.cast(xywh, tf.float32) - corners = bounding_box.convert_format(xywh, source="center_xywh", target="xyxy") + corners = bounding_box.convert_format( + xywh, source="center_xywh", target="xyxy" + ) mask_shape = (images_width, images_height) is_rectangle = corners_to_mask(corners, mask_shape) is_rectangle = tf.expand_dims(is_rectangle, -1) diff --git a/keras_cv/utils/fill_utils_test.py b/keras_cv/utils/fill_utils_test.py index bb98910210..a22a8d6b80 100644 --- a/keras_cv/utils/fill_utils_test.py +++ b/keras_cv/utils/fill_utils_test.py @@ -311,7 +311,9 @@ def test_different_fill(self): centers_y = tf.fill([batch_size], cent_y) width = tf.fill([batch_size], rec_w) height = tf.fill([batch_size], rec_h) - fill = tf.stack([tf.fill(images[0].shape, 2), tf.fill(images[1].shape, 3)]) + fill = tf.stack( + [tf.fill(images[0].shape, 2), tf.fill(images[1].shape, 3)] + ) filled_images = fill_utils.fill_rectangle( images, centers_x, centers_y, width, height, fill diff --git a/keras_cv/utils/preprocessing.py b/keras_cv/utils/preprocessing.py index 5535d0d0d3..be1369e787 100644 --- a/keras_cv/utils/preprocessing.py +++ b/keras_cv/utils/preprocessing.py @@ -48,7 +48,9 @@ def get_interpolation(interpolation): return _TF_INTERPOLATION_METHODS[interpolation] -def transform_value_range(images, original_range, target_range, dtype=tf.float32): +def transform_value_range( + images, original_range, target_range, dtype=tf.float32 +): """transforms values in input tensor from original_range to target_range. This function is intended to be used in preprocessing layers that rely upon color values. This allows us to assume internally that @@ -80,17 +82,24 @@ def transform_value_range(images, original_range, target_range, dtype=tf.float32 ) ``` """ - if original_range[0] == target_range[0] and original_range[1] == target_range[1]: + if ( + original_range[0] == target_range[0] + and original_range[1] == target_range[1] + ): return images images = tf.cast(images, dtype=dtype) original_min_value, original_max_value = _unwrap_value_range( original_range, dtype=dtype ) - target_min_value, target_max_value = _unwrap_value_range(target_range, dtype=dtype) + target_min_value, target_max_value = _unwrap_value_range( + target_range, dtype=dtype + ) # images in the [0, 1] scale - images = (images - original_min_value) / (original_max_value - original_min_value) + images = (images - original_min_value) / ( + original_max_value - original_min_value + ) scale_factor = target_max_value - target_min_value return (images * scale_factor) + target_min_value @@ -125,7 +134,9 @@ def blend(image1: tf.Tensor, image2: tf.Tensor, factor: float) -> tf.Tensor: return tf.clip_by_value(temp, 0.0, 255.0) -def parse_factor(param, min_value=0.0, max_value=1.0, param_name="factor", seed=None): +def parse_factor( + param, min_value=0.0, max_value=1.0, param_name="factor", seed=None +): if isinstance(param, dict): # For all classes missing a `from_config` implementation. # (RandomHue, RandomShear, etc.) @@ -194,11 +205,17 @@ def get_rotation_matrix(angles, image_height, image_width, name=None): with backend.name_scope(name or "rotation_matrix"): x_offset = ( (image_width - 1) - - (tf.cos(angles) * (image_width - 1) - tf.sin(angles) * (image_height - 1)) + - ( + tf.cos(angles) * (image_width - 1) + - tf.sin(angles) * (image_height - 1) + ) ) / 2.0 y_offset = ( (image_height - 1) - - (tf.sin(angles) * (image_width - 1) + tf.cos(angles) * (image_height - 1)) + - ( + tf.sin(angles) * (image_width - 1) + + tf.cos(angles) * (image_height - 1) + ) ) / 2.0 num_angles = tf.shape(angles)[0] return tf.concat( @@ -319,7 +336,9 @@ def transform( if output_shape_value is not None: output_shape = output_shape_value - output_shape = tf.convert_to_tensor(output_shape, tf.int32, name="output_shape") + output_shape = tf.convert_to_tensor( + output_shape, tf.int32, name="output_shape" + ) if not output_shape.get_shape().is_compatible_with([2]): raise ValueError( @@ -328,7 +347,9 @@ def transform( "{}".format(output_shape) ) - fill_value = tf.convert_to_tensor(fill_value, tf.float32, name="fill_value") + fill_value = tf.convert_to_tensor( + fill_value, tf.float32, name="fill_value" + ) return tf.raw_ops.ImageProjectiveTransformV3( images=images, diff --git a/keras_cv/utils/resource_loader.py b/keras_cv/utils/resource_loader.py index 4abe3fa086..c0d6f2bda2 100644 --- a/keras_cv/utils/resource_loader.py +++ b/keras_cv/utils/resource_loader.py @@ -54,7 +54,9 @@ def __init__(self, relative_path): def ops(self): if self._ops is None: self.display_warning_if_incompatible() - self._ops = tf.load_op_library(get_path_to_datafile(self.relative_path)) + self._ops = tf.load_op_library( + get_path_to_datafile(self.relative_path) + ) return self._ops def display_warning_if_incompatible(self): diff --git a/keras_cv/utils/target_gather.py b/keras_cv/utils/target_gather.py index 772faeeae0..a028075af9 100644 --- a/keras_cv/utils/target_gather.py +++ b/keras_cv/utils/target_gather.py @@ -106,7 +106,9 @@ def _gather_batched(labels, match_indices, mask, mask_val): batch_indices = tf.expand_dims( tf.range(indices_shape[0], dtype=indices_dtype), axis=-1 ) * tf.ones([1, indices_shape[-1]], dtype=indices_dtype) - gather_nd_indices = tf.stack([batch_indices, match_indices], axis=-1) + gather_nd_indices = tf.stack( + [batch_indices, match_indices], axis=-1 + ) targets = tf.gather_nd(labels, gather_nd_indices) if mask is None: return targets diff --git a/keras_cv/utils/train.py b/keras_cv/utils/train.py index 0b9a3ad984..feb8df84a0 100644 --- a/keras_cv/utils/train.py +++ b/keras_cv/utils/train.py @@ -23,9 +23,13 @@ def scale_loss_for_distribution(loss_value): return loss_value -def convert_inputs_to_tf_dataset(x=None, y=None, sample_weight=None, batch_size=None): +def convert_inputs_to_tf_dataset( + x=None, y=None, sample_weight=None, batch_size=None +): if sample_weight is not None: - raise ValueError("Contrastive trainers do not yet support `sample_weight`.") + raise ValueError( + "Contrastive trainers do not yet support `sample_weight`." + ) if isinstance(x, tf.data.Dataset): if y is not None or batch_size is not None: diff --git a/keras_cv/version_check_test.py b/keras_cv/version_check_test.py index 938359a1ab..e7a6e141ab 100644 --- a/keras_cv/version_check_test.py +++ b/keras_cv/version_check_test.py @@ -32,7 +32,8 @@ def test_check_tf_version_error(): tf.__version__ = "2.8.0" with pytest.raises( - RuntimeError, match="Tensorflow package version needs to be at least 2.11.0" + RuntimeError, + match="Tensorflow package version needs to be at least 2.11.0", ): version_check.check_tf_version() diff --git a/setup.cfg b/setup.cfg index 12759362c6..8a84d56891 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,7 @@ filterwarnings = ignore::PendingDeprecationWarning ignore::FutureWarning [flake8] -max-line-length = 88 +max-line-length = 80 max-doc-length = 200 per-file-ignores = ./keras_cv/__init__.py:E402, F401 @@ -41,4 +41,4 @@ profile = black force_single_line = True known_first_party = keras_cv,tests default_section = THIRDPARTY -line_length = 88 +line_length = 80 diff --git a/shell/format.sh b/shell/format.sh index aff6e55241..ede05d9615 100755 --- a/shell/format.sh +++ b/shell/format.sh @@ -1,3 +1,3 @@ #!/bin/bash -isort --sl --profile=black . -black . +isort --sl . +black --line-length 80 . diff --git a/shell/lint.sh b/shell/lint.sh index b51a83b484..7386fd62df 100755 --- a/shell/lint.sh +++ b/shell/lint.sh @@ -31,7 +31,7 @@ then exit 1 fi [ $# -eq 0 ] && echo "no issues with flake8" -black --check $files +black --line-length 80 --check $files if ! [ $? -eq 0 ] then echo "Please run \"sh shell/format.sh\" to format the code." diff --git a/shell/weights/remove_top.py b/shell/weights/remove_top.py index df2838e256..8de142b602 100644 --- a/shell/weights/remove_top.py +++ b/shell/weights/remove_top.py @@ -6,7 +6,9 @@ import keras_cv flags.DEFINE_string("weights_path", None, "Path of weights to load") -flags.DEFINE_string("output_weights_path", None, "Path of notop weights to store") +flags.DEFINE_string( + "output_weights_path", None, "Path of notop weights to store" +) flags.DEFINE_string("model_name", None, "Name of the KerasCV.model") FLAGS = flags.FLAGS diff --git a/shell/weights/update_training_history.py b/shell/weights/update_training_history.py index d8e10a3faf..66006e2691 100644 --- a/shell/weights/update_training_history.py +++ b/shell/weights/update_training_history.py @@ -8,7 +8,9 @@ flags.DEFINE_string( "model_name", None, "The name of the KerasCV.model that was trained" ) -flags.DEFINE_string("tensorboard_logs_path", None, "Path to tensorboard logs to load") +flags.DEFINE_string( + "tensorboard_logs_path", None, "Path to tensorboard logs to load" +) flags.DEFINE_string("training_script_path", None, "Path to the training script") flags.DEFINE_string( "script_version", @@ -21,7 +23,9 @@ "The version of the training script used to produce the latest weights. For example, v0", ) flags.DEFINE_string( - "contributor", None, "The GitHub username of the contributor of these results" + "contributor", + None, + "The GitHub username of the contributor of these results", ) flags.DEFINE_string( "accelerators", None, "The number of accelerators used for training." @@ -74,7 +78,9 @@ tensorboard_results = tensorboard_experiment.get_scalars() -training_epochs = max(tensorboard_results[tensorboard_results.run == "train"].step) +training_epochs = max( + tensorboard_results[tensorboard_results.run == "train"].step +) results_tags = tensorboard_results.tag.unique() @@ -90,7 +96,10 @@ (tensorboard_results.run == "validation") & ( (tensorboard_results.tag == "epoch_categorical_accuracy") - | (tensorboard_results.tag == "epoch_sparse_categorical_accuracy") + | ( + tensorboard_results.tag + == "epoch_sparse_categorical_accuracy" + ) ) ].value ) @@ -130,7 +139,10 @@ args_dict[key_value_pair[0]] = key_value_pair[1] new_results = { - "script": {"name": "/".join(training_script_dirs[2:]), "version": script_version}, + "script": { + "name": "/".join(training_script_dirs[2:]), + "version": script_version, + }, "epochs_trained": training_epochs, "tensorboard_logs": f"https://tensorboard.dev/experiment/{tensorboard_experiment_id}/", "contributor": contributor, From 4eb79516713f8172a6f8483a52d073bda8eeb2c8 Mon Sep 17 00:00:00 2001 From: IMVision <88665786+IMvision12@users.noreply.github.com> Date: Tue, 21 Feb 2023 23:29:46 +0530 Subject: [PATCH 24/27] RetinaNet Ragged image Issue (#1372) * fix_retina_net * Update retina_net.py * Update retina_net.py --- keras_cv/models/object_detection/retina_net/retina_net.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/keras_cv/models/object_detection/retina_net/retina_net.py b/keras_cv/models/object_detection/retina_net/retina_net.py index d753889a3b..c6563f9da4 100644 --- a/keras_cv/models/object_detection/retina_net/retina_net.py +++ b/keras_cv/models/object_detection/retina_net/retina_net.py @@ -226,6 +226,13 @@ def _forward(self, images, training=None): return box_pred, cls_pred def call(self, images, training=None): + if isinstance(images, tf.RaggedTensor): + raise ValueError( + "`RetinaNet()` does not yet support inputs of type `RaggedTensor` for input images. " + "To correctly resize your images for object detection tasks, we recommend resizing using " + "`keras_cv.layers.Resizing(pad_to_aspect_ratio=True, bounding_box_format=your_format)`" + "on your inputs." + ) box_pred, cls_pred = self._forward(images, training=training) if not training: # box_pred is on "center_yxhw" format, convert to target format. From 7dc04d04b1bf7428edb3e1933c042dca489bc60d Mon Sep 17 00:00:00 2001 From: IMVision <88665786+IMvision12@users.noreply.github.com> Date: Wed, 22 Feb 2023 04:34:36 +0530 Subject: [PATCH 25/27] Remove closures resnetv1 (#1412) * resnetv1 * fix * fix * fix * fix * fix * fix * fix * docs and tests * docs --- keras_cv/models/resnet_v1.py | 516 +++++++++++++++++------------- keras_cv/models/resnet_v1_test.py | 18 +- 2 files changed, 308 insertions(+), 226 deletions(-) diff --git a/keras_cv/models/resnet_v1.py b/keras_cv/models/resnet_v1.py index a1925e4051..3c3265ab26 100644 --- a/keras_cv/models/resnet_v1.py +++ b/keras_cv/models/resnet_v1.py @@ -20,6 +20,7 @@ import types import tensorflow as tf +from tensorflow import keras from tensorflow.keras import backend from tensorflow.keras import layers @@ -55,6 +56,7 @@ } BN_AXIS = 3 +BN_EPSILON = 1.001e-5 BASE_DOCSTRING = """Instantiates the {name} architecture. Reference: @@ -70,15 +72,18 @@ For transfer learning use cases, make sure to read the [guide to transfer learning & fine-tuning](https://keras.io/guides/transfer_learning/). + Args: - include_rescaling: whether or not to Rescale the inputs.If set to True, - inputs will be passed through a `Rescaling(1/255.0)` layer. - include_top: whether to include the fully-connected layer at the top of the - network. If provided, classes must be provided. - classes: optional number of classes to classify images into, only to be - specified if `include_top` is True. - weights: one of `None` (random initialization), or a pretrained weight file - path. + include_rescaling: bool, whether or not to Rescale the inputs. If set + to `True`, inputs will be passed through a `Rescaling(1/255.0)` + layer. + include_top: bool, whether to include the fully-connected layer at + the top of the network. If provided, `classes` must be provided. + classes: optional int, number of classes to classify images into (only + to be specified if `include_top` is `True`). + weights: one of `None` (random initialization), a pretrained weight file + path, or a reference to pre-trained weights (e.g. 'imagenet/classification') + (see available pre-trained weights in weights.py) input_shape: optional shape tuple, defaults to (None, None, 3). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. @@ -94,199 +99,207 @@ classifier_activation: A `str` or callable. The activation function to use on the "top" layer. Ignored unless `include_top=True`. Set `classifier_activation=None` to return the logits of the "top" layer. + Returns: A `keras.Model` instance. """ -def BasicBlock(filters, kernel_size=3, stride=1, conv_shortcut=True, name=None): - """A basic residual block. +def apply_basic_block( + x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None +): + """A basic residual block (v1). + Args: - x: input tensor. - filters: integer, filters of the basic layer. - kernel_size: default 3, kernel size of the basic layer. - stride: default 1, stride of the first layer. - conv_shortcut: default True, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. + x: input tensor. + filters: int, filters of the basic layer. + kernel_size: int, kernel size of the bottleneck layer. Defaults to 3. + stride: int, stride of the first layer. Defaults to 1. + conv_shortcut: bool, uses convolution shortcut if `True`. If `False` + (default), uses identity or pooling shortcut, based on stride. + Returns: Output tensor for the residual block. """ + if name is None: name = f"v1_basic_block_{backend.get_uid('v1_basic_block_')}" - def apply(x): - if conv_shortcut: - shortcut = layers.Conv2D( - filters, - 1, - strides=stride, - use_bias=False, - name=name + "_0_conv", - )(x) - shortcut = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_0_bn" - )(shortcut) - else: - shortcut = x - - x = layers.Conv2D( + if conv_shortcut: + shortcut = layers.Conv2D( filters, - kernel_size, - padding="SAME", + 1, strides=stride, use_bias=False, - name=name + "_1_conv", + name=name + "_0_conv", )(x) - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_1_bn" - )(x) - x = layers.Activation("relu", name=name + "_1_relu")(x) + shortcut = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_0_bn" + )(shortcut) + else: + shortcut = x - x = layers.Conv2D( - filters, - kernel_size, - padding="SAME", - use_bias=False, - name=name + "_2_conv", - )(x) - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_2_bn" - )(x) + x = layers.Conv2D( + filters, + kernel_size, + padding="SAME", + strides=stride, + use_bias=False, + name=name + "_1_conv", + )(x) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_1_bn" + )(x) + x = layers.Activation("relu", name=name + "_1_relu")(x) - x = layers.Add(name=name + "_add")([shortcut, x]) - x = layers.Activation("relu", name=name + "_out")(x) - return x + x = layers.Conv2D( + filters, + kernel_size, + padding="SAME", + use_bias=False, + name=name + "_2_conv", + )(x) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_2_bn" + )(x) + + x = layers.Add(name=name + "_add")([shortcut, x]) + x = layers.Activation("relu", name=name + "_out")(x) + return x - return apply +def apply_block( + x, filters, kernel_size=3, stride=1, conv_shortcut=True, name=None +): + """A residual block (v1). -def Block(filters, kernel_size=3, stride=1, conv_shortcut=True, name=None): - """A residual block. Args: - x: input tensor. - filters: integer, filters of the bottleneck layer. - kernel_size: default 3, kernel size of the bottleneck layer. - stride: default 1, stride of the first layer. - conv_shortcut: default True, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. + x: input tensor. + filters: int, filters of the basic layer. + kernel_size: int, kernel size of the bottleneck layer. Defaults to 3. + stride: int, stride of the first layer. Defaults to 1. + conv_shortcut: bool, uses convolution shortcut if `True`. If `False` + (default), uses identity or pooling shortcut, based on stride. + Returns: Output tensor for the residual block. """ + if name is None: name = f"v1_block_{backend.get_uid('v1_block')}" - def apply(x): - if conv_shortcut: - shortcut = layers.Conv2D( - 4 * filters, - 1, - strides=stride, - use_bias=False, - name=name + "_0_conv", - )(x) - shortcut = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_0_bn" - )(shortcut) - else: - shortcut = x - - x = layers.Conv2D( - filters, 1, strides=stride, use_bias=False, name=name + "_1_conv" - )(x) - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_1_bn" - )(x) - x = layers.Activation("relu", name=name + "_1_relu")(x) - - x = layers.Conv2D( - filters, - kernel_size, - padding="SAME", + if conv_shortcut: + shortcut = layers.Conv2D( + 4 * filters, + 1, + strides=stride, use_bias=False, - name=name + "_2_conv", + name=name + "_0_conv", )(x) - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_2_bn" - )(x) - x = layers.Activation("relu", name=name + "_2_relu")(x) + shortcut = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_0_bn" + )(shortcut) + else: + shortcut = x - x = layers.Conv2D( - 4 * filters, 1, use_bias=False, name=name + "_3_conv" - )(x) - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name=name + "_3_bn" - )(x) + x = layers.Conv2D( + filters, 1, strides=stride, use_bias=False, name=name + "_1_conv" + )(x) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_1_bn" + )(x) + x = layers.Activation("relu", name=name + "_1_relu")(x) + + x = layers.Conv2D( + filters, + kernel_size, + padding="SAME", + use_bias=False, + name=name + "_2_conv", + )(x) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_2_bn" + )(x) + x = layers.Activation("relu", name=name + "_2_relu")(x) - x = layers.Add(name=name + "_add")([shortcut, x]) - x = layers.Activation("relu", name=name + "_out")(x) - return x + x = layers.Conv2D(4 * filters, 1, use_bias=False, name=name + "_3_conv")(x) + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name=name + "_3_bn" + )(x) - return apply + x = layers.Add(name=name + "_add")([shortcut, x]) + x = layers.Activation("relu", name=name + "_out")(x) + return x -def Stack( - filters, blocks, stride=2, name=None, block_fn=Block, first_shortcut=True +def apply_stack( + x, + filters, + blocks, + stride=2, + name=None, + block_type="block", + first_shortcut=True, ): """A set of stacked residual blocks. + Args: - filters: integer, filters of the layers in a block. - blocks: integer, blocks in the stacked blocks. - stride1: default 2, stride of the first layer in the first block. - name: string, stack label. - block_fn: callable, `Block` or `BasicBlock`, the block function to stack. - first_shortcut: default True, use convolution shortcut if True, - otherwise identity shortcut. + x: input tensor. + filters: int, filters of the layer in a block. + blocks: int, blocks in the stacked blocks. + stride: int, stride of the first layer in the first block. Defaults to 2. + block_type: string, one of "basic_block" or "block". The block type to + stack. Use "basic_block" for ResNet18 and ResNet34. + first_shortcut: bool. Use convolution shortcut if `True` (default), + otherwise uses identity or pooling shortcut, based on stride. + Returns: Output tensor for the stacked blocks. """ + if name is None: name = f"v1_stack_{backend.get_uid('v1_stack')}" - def apply(x): - x = block_fn( - filters, - stride=stride, - name=name + "_block1", - conv_shortcut=first_shortcut, - )(x) - for i in range(2, blocks + 1): - x = block_fn( - filters, conv_shortcut=False, name=name + "_block" + str(i) - )(x) - return x + if block_type == "basic_block": + block_fn = apply_basic_block + elif block_type == "block": + block_fn = apply_block + else: + raise ValueError( + """`block_type` must be either "basic_block" or "block". """ + f"Received block_type={block_type}." + ) - return apply + x = block_fn( + x, + filters, + stride=stride, + name=name + "_block1", + conv_shortcut=first_shortcut, + ) + for i in range(2, blocks + 1): + x = block_fn( + x, filters, conv_shortcut=False, name=name + "_block" + str(i) + ) + return x -def ResNet( - stackwise_filters, - stackwise_blocks, - stackwise_strides, - include_rescaling, - include_top, - name="ResNet", - weights=None, - input_shape=(None, None, 3), - input_tensor=None, - pooling=None, - classes=None, - classifier_activation="softmax", - block_fn=Block, - **kwargs, -): +@keras.utils.register_keras_serializable(package="keras_cv.models") +class ResNet(keras.Model): """Instantiates the ResNet architecture. Args: - stackwise_filters: number of filters for each stack in the model. - stackwise_blocks: number of blocks for each stack in the model. - stackwise_strides: stride for each stack in the model. - include_rescaling: whether or not to Rescale the inputs. If set to True, - inputs will be passed through a `Rescaling(1/255.0)` layer. - name: string, model name. - include_top: whether to include the fully-connected + stackwise_filters: list of ints, number of filters for each stack in + the model. + stackwise_blocks: list of ints, number of blocks for each stack in the + model. + stackwise_strides: list of ints, stride for each stack in the model. + include_rescaling: bool, whether or not to Rescale the inputs. If set + to `True`, inputs will be passed through a `Rescaling(1/255.0)` + layer. + include_top: bool, whether to include the fully-connected layer at the top of the network. + name: string, model name. weights: one of `None` (random initialization), or the path to the weights file to be loaded. input_shape: optional shape tuple, defaults to (None, None, 3). @@ -305,89 +318,141 @@ def ResNet( be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True. - classifier_activation: A `str` or callable. The activation function to use - on the "top" layer. Ignored unless `include_top=True`. Set - `classifier_activation=None` to return the logits of the "top" layer. - block_fn: callable, `Block` or `BasicBlock`, the block function to stack. - Use 'basic_block' for ResNet18 and ResNet34. - **kwargs: Pass-through keyword arguments to `tf.keras.Model`. + classifier_activation: A `str` or callable. The activation function to + use on the "top" layer. Ignored unless `include_top=True`. Set + `classifier_activation=None` to return the logits of the "top" + layer. + block_type: string, one of "basic_block" or "block". The block type to + stack. Use "basic_block" for ResNet18 and ResNet34. Returns: A `keras.Model` instance. """ - if weights and not tf.io.gfile.exists(weights): - raise ValueError( - "The `weights` argument should be either `None` or the path to the " - f"weights file to be loaded. Weights file not found at location: {weights}" - ) - - if include_top and not classes: - raise ValueError( - "If `include_top` is True, you should specify `classes`. " - f"Received: classes={classes}" - ) - - if include_top and pooling: - raise ValueError( - f"`pooling` must be `None` when `include_top=True`." - f"Received pooling={pooling} and include_top={include_top}. " - ) - inputs = utils.parse_model_inputs(input_shape, input_tensor) - x = inputs - - if include_rescaling: - x = layers.Rescaling(1 / 255.0)(x) - - x = layers.Conv2D( - 64, 7, strides=2, use_bias=False, padding="same", name="conv1_conv" - )(x) - - x = layers.BatchNormalization( - axis=BN_AXIS, epsilon=1.001e-5, name="conv1_bn" - )(x) - x = layers.Activation("relu", name="conv1_relu")(x) - - x = layers.MaxPooling2D(3, strides=2, padding="same", name="pool1_pool")(x) - - num_stacks = len(stackwise_filters) + def __init__( + self, + stackwise_filters, + stackwise_blocks, + stackwise_strides, + include_rescaling, + include_top, + name="ResNet", + weights=None, + input_shape=(None, None, 3), + input_tensor=None, + pooling=None, + classes=None, + classifier_activation="softmax", + block_type="block", + **kwargs, + ): + if weights and not tf.io.gfile.exists(weights): + raise ValueError( + "The `weights` argument should be either `None` or the path to the " + f"weights file to be loaded. Weights file not found at location: {weights}" + ) + + if include_top and not classes: + raise ValueError( + "If `include_top` is True, you should specify `classes`. " + f"Received: classes={classes}" + ) + + if include_top and pooling: + raise ValueError( + f"`pooling` must be `None` when `include_top=True`." + f"Received pooling={pooling} and include_top={include_top}. " + ) + + inputs = utils.parse_model_inputs(input_shape, input_tensor) + x = inputs + + if include_rescaling: + x = layers.Rescaling(1 / 255.0)(x) - stack_level_outputs = {} - for stack_index in range(num_stacks): - x = Stack( - filters=stackwise_filters[stack_index], - blocks=stackwise_blocks[stack_index], - stride=stackwise_strides[stack_index], - block_fn=block_fn, - first_shortcut=block_fn == Block or stack_index > 0, + x = layers.Conv2D( + 64, 7, strides=2, use_bias=False, padding="same", name="conv1_conv" )(x) - stack_level_outputs[stack_index + 2] = x - if include_top: - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - x = layers.Dense( - classes, activation=classifier_activation, name="predictions" + x = layers.BatchNormalization( + axis=BN_AXIS, epsilon=BN_EPSILON, name="conv1_bn" )(x) - else: - if pooling == "avg": - x = layers.GlobalAveragePooling2D(name="avg_pool")(x) - elif pooling == "max": - x = layers.GlobalMaxPooling2D(name="max_pool")(x) + x = layers.Activation("relu", name="conv1_relu")(x) - # Create model. - model = tf.keras.Model(inputs, x, name=name, **kwargs) - - if weights is not None: - model.load_weights(weights) - - # Set this private attribute for recreate backbone model with outputs at each of the - # resolution level. - model._backbone_level_outputs = stack_level_outputs - - # Bind the `to_backbone_model` method to the application model. - model.as_backbone = types.MethodType(utils.as_backbone, model) + x = layers.MaxPooling2D( + 3, strides=2, padding="same", name="pool1_pool" + )(x) - return model + num_stacks = len(stackwise_filters) + + stack_level_outputs = {} + for stack_index in range(num_stacks): + x = apply_stack( + x, + filters=stackwise_filters[stack_index], + blocks=stackwise_blocks[stack_index], + stride=stackwise_strides[stack_index], + block_type=block_type, + first_shortcut=(block_type == "block" or stack_index > 0), + ) + stack_level_outputs[stack_index + 2] = x + + if include_top: + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + x = layers.Dense( + classes, activation=classifier_activation, name="predictions" + )(x) + else: + if pooling == "avg": + x = layers.GlobalAveragePooling2D(name="avg_pool")(x) + elif pooling == "max": + x = layers.GlobalMaxPooling2D(name="max_pool")(x) + + # Create model. + super().__init__(inputs=inputs, outputs=x, **kwargs) + + # All references to `self` below this line + if weights is not None: + self.load_weights(weights) + # Set this private attribute for recreate backbone model with outputs at + # each resolution level. + self._backbone_level_outputs = stack_level_outputs + + # Bind the `to_backbone_model` method to the application model. + self.as_backbone = types.MethodType(utils.as_backbone, self) + + self.stackwise_filters = stackwise_filters + self.stackwise_blocks = stackwise_blocks + self.stackwise_strides = stackwise_strides + self.include_rescaling = include_rescaling + self.include_top = include_top + self.input_tensor = input_tensor + self.pooling = pooling + self.classes = classes + self.classifier_activation = classifier_activation + self.block_type = block_type + + def get_config(self): + return { + "stackwise_filters": self.stackwise_filters, + "stackwise_blocks": self.stackwise_blocks, + "stackwise_strides": self.stackwise_strides, + "include_rescaling": self.include_rescaling, + "include_top": self.include_top, + # Remove batch dimension from `input_shape` + "input_shape": self.input_shape[1:], + "input_tensor": self.input_tensor, + "pooling": self.pooling, + "classes": self.classes, + "classifier_activation": self.classifier_activation, + "block_type": self.block_type, + "name": self.name, + "trainable": self.trainable, + } + + @classmethod + def from_config(cls, config): + return cls(**config) def ResNet18( @@ -418,7 +483,7 @@ def ResNet18( pooling=pooling, classes=classes, classifier_activation=classifier_activation, - block_fn=BasicBlock, + block_type="basic_block", **kwargs, ) @@ -451,7 +516,7 @@ def ResNet34( pooling=pooling, classes=classes, classifier_activation=classifier_activation, - block_fn=BasicBlock, + block_type="basic_block", **kwargs, ) @@ -484,6 +549,7 @@ def ResNet50( pooling=pooling, classes=classes, classifier_activation=classifier_activation, + block_type="block", **kwargs, ) @@ -515,6 +581,7 @@ def ResNet101( pooling=pooling, classes=classes, classifier_activation=classifier_activation, + block_type="block", **kwargs, ) @@ -546,6 +613,7 @@ def ResNet152( pooling=pooling, classes=classes, classifier_activation=classifier_activation, + block_type="block", **kwargs, ) diff --git a/keras_cv/models/resnet_v1_test.py b/keras_cv/models/resnet_v1_test.py index 573c12db87..c38be23d35 100644 --- a/keras_cv/models/resnet_v1_test.py +++ b/keras_cv/models/resnet_v1_test.py @@ -14,6 +14,7 @@ import tensorflow as tf from absl.testing import parameterized +from packaging import version from keras_cv.models import resnet_v1 @@ -51,8 +52,21 @@ def test_application_variable_input_channels(self, app, last_dim, args): super()._test_application_variable_input_channels(app, last_dim, args) @parameterized.parameters(*MODEL_LIST) - def test_model_can_be_used_as_backbone(self, app, last_dim, args): - super()._test_model_can_be_used_as_backbone(app, last_dim, args) + def test_model_serialization_tf(self, app, last_dim, args): + super()._test_model_serialization( + app, last_dim, args, save_format="tf", filename="model" + ) + + @parameterized.parameters(*MODEL_LIST) + def test_model_serialization_keras_format(self, app, last_dim, args): + if version.parse(tf.__version__) >= version.parse("2.12.0-dev0"): + super()._test_model_serialization( + app, + last_dim, + args, + save_format="keras_v3", + filename="model.keras", + ) if __name__ == "__main__": From ec7dc0cb820ba96080426d9166ad13faf8b4e053 Mon Sep 17 00:00:00 2001 From: HongYu <20734616+james77777778@users.noreply.github.com> Date: Thu, 23 Feb 2023 03:14:08 +0800 Subject: [PATCH 26/27] Vectorize RandomZoom (#1435) * Vectorize RandomZoom * Add one test. - check RandomZoom process independently on batched images * Left unsupport functions unimplemented. - augment_segmentation_masks - augment_bounding_boxes --- benchmarks/vectorized_random_zoom.py | 355 ++++++++++++++++++ keras_cv/layers/preprocessing/random_zoom.py | 87 +++-- .../layers/preprocessing/random_zoom_test.py | 11 + 3 files changed, 413 insertions(+), 40 deletions(-) create mode 100644 benchmarks/vectorized_random_zoom.py diff --git a/benchmarks/vectorized_random_zoom.py b/benchmarks/vectorized_random_zoom.py new file mode 100644 index 0000000000..110a0d3480 --- /dev/null +++ b/benchmarks/vectorized_random_zoom.py @@ -0,0 +1,355 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time + +import matplotlib.pyplot as plt +import numpy as np +import tensorflow as tf +from keras import backend + +from keras_cv.layers import RandomZoom +from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( + BaseImageAugmentationLayer, +) +from keras_cv.utils import preprocessing as preprocessing_utils + +# In order to support both unbatched and batched inputs, the horizontal +# and verticle axis is reverse indexed +H_AXIS = -3 +W_AXIS = -2 + + +class OldRandomZoom(BaseImageAugmentationLayer): + """A preprocessing layer which randomly zooms images during training. + + This layer will randomly zoom in or out on each axis of an image + independently, filling empty space according to `fill_mode`. + + Input pixel values can be of any range (e.g. `[0., 1.)` or `[0, 255]`) and + of integer or floating point dtype. By default, the layer will output + floats. + + Args: + height_factor: a float represented as fraction of value, or a tuple of + size 2 representing lower and upper bound for zooming vertically. When + represented as a single float, this value is used for both the upper and + lower bound. A positive value means zooming out, while a negative value + means zooming in. For instance, `height_factor=(0.2, 0.3)` result in an + output zoomed out by a random amount in the range `[+20%, +30%]`. + `height_factor=(-0.3, -0.2)` result in an output zoomed in by a random + amount in the range `[-30%, -20%]`. + width_factor: a float represented as fraction of value, or a tuple of size + 2 representing lower and upper bound for zooming horizontally. When + represented as a single float, this value is used for both the upper and + lower bound. For instance, `width_factor=(0.2, 0.3)` result in an output + zooming out between 20% to 30%. `width_factor=(-0.3, -0.2)` result in an + output zooming in between 20% to 30%. Defaults to `None`, i.e., zooming + vertical and horizontal directions by preserving the aspect ratio. If + height_factor=0 and width_factor=None, it would result in images with + no zoom at all. + fill_mode: Points outside the boundaries of the input are filled according + to the given mode (one of `{"constant", "reflect", "wrap", "nearest"}`). + - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by + reflecting about the edge of the last pixel. + - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by + filling all values beyond the edge with the same constant value k = 0. + - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by + wrapping around to the opposite edge. + - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by + the nearest pixel. + interpolation: Interpolation mode. Supported values: `"nearest"`, + `"bilinear"`. + seed: Integer. Used to create a random seed. + fill_value: a float represents the value to be filled outside the + boundaries when `fill_mode="constant"`. + + Example: + + >>> input_img = np.random.random((32, 224, 224, 3)) + >>> layer = keras_cv.layers.RandomZoom(.5, .2) + >>> out_img = layer(input_img) + >>> out_img.shape + TensorShape([32, 224, 224, 3]) + + Input shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + + Output shape: + 3D (unbatched) or 4D (batched) tensor with shape: + `(..., height, width, channels)`, in `"channels_last"` format. + """ + + def __init__( + self, + height_factor, + width_factor=None, + fill_mode="reflect", + interpolation="bilinear", + seed=None, + fill_value=0.0, + **kwargs, + ): + super().__init__(seed=seed, force_generator=True, **kwargs) + self.height_factor = height_factor + if isinstance(height_factor, (tuple, list)): + self.height_lower = height_factor[0] + self.height_upper = height_factor[1] + else: + self.height_lower = -height_factor + self.height_upper = height_factor + + if abs(self.height_lower) > 1.0 or abs(self.height_upper) > 1.0: + raise ValueError( + "`height_factor` must have values between [-1, 1], " + f"got {height_factor}" + ) + + self.width_factor = width_factor + if width_factor is not None: + if isinstance(width_factor, (tuple, list)): + self.width_lower = width_factor[0] + self.width_upper = width_factor[1] + else: + self.width_lower = -width_factor + self.width_upper = width_factor + + if self.width_lower < -1.0 or self.width_upper < -1.0: + raise ValueError( + "`width_factor` must have values larger than -1, " + f"got {width_factor}" + ) + + preprocessing_utils.check_fill_mode_and_interpolation( + fill_mode, interpolation + ) + + self.fill_mode = fill_mode + self.fill_value = fill_value + self.interpolation = interpolation + self.seed = seed + + def get_random_transformation(self, image=None, **kwargs): + height_zoom = self._random_generator.random_uniform( + shape=[1, 1], + minval=1.0 + self.height_lower, + maxval=1.0 + self.height_upper, + ) + if self.width_factor is not None: + width_zoom = self._random_generator.random_uniform( + shape=[1, 1], + minval=1.0 + self.width_lower, + maxval=1.0 + self.width_upper, + ) + else: + width_zoom = height_zoom + + return {"height_zoom": height_zoom, "width_zoom": width_zoom} + + def augment_image(self, image, transformation, **kwargs): + image = preprocessing_utils.ensure_tensor(image, self.compute_dtype) + original_shape = image.shape + image = tf.expand_dims(image, 0) + image_shape = tf.shape(image) + img_hd = tf.cast(image_shape[H_AXIS], tf.float32) + img_wd = tf.cast(image_shape[W_AXIS], tf.float32) + width_zoom = transformation["width_zoom"] + height_zoom = transformation["height_zoom"] + zooms = tf.cast( + tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32 + ) + output = preprocessing_utils.transform( + image, + self.get_zoom_matrix(zooms, img_hd, img_wd), + fill_mode=self.fill_mode, + fill_value=self.fill_value, + interpolation=self.interpolation, + ) + output = tf.squeeze(output, 0) + output.set_shape(original_shape) + return output + + def augment_label(self, label, transformation, **kwargs): + return label + + def get_config(self): + config = { + "height_factor": self.height_factor, + "width_factor": self.width_factor, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + "interpolation": self.interpolation, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + def get_zoom_matrix(self, zooms, image_height, image_width, name=None): + """Returns projective transform(s) for the given zoom(s). + + Args: + zooms: A matrix of 2-element lists representing `[zx, zy]` to zoom for + each image (for a batch of images). + image_height: Height of the image(s) to be transformed. + image_width: Width of the image(s) to be transformed. + name: The name of the op. + + Returns: + A tensor of shape `(num_images, 8)`. Projective transforms which can be + given to operation `image_projective_transform_v2`. + If one row of transforms is + `[a0, a1, a2, b0, b1, b2, c0, c1]`, then it maps the *output* point + `(x, y)` to a transformed *input* point + `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, + where `k = c0 x + c1 y + 1`. + """ + with backend.name_scope(name or "zoom_matrix"): + num_zooms = tf.shape(zooms)[0] + # The zoom matrix looks like: + # [[zx 0 0] + # [0 zy 0] + # [0 0 1]] + # where the last entry is implicit. + # Zoom matrices are always float32. + x_offset = ((image_width - 1.0) / 2.0) * (1.0 - zooms[:, 0, None]) + y_offset = ((image_height - 1.0) / 2.0) * (1.0 - zooms[:, 1, None]) + return tf.concat( + values=[ + zooms[:, 0, None], + tf.zeros((num_zooms, 1), tf.float32), + x_offset, + tf.zeros((num_zooms, 1), tf.float32), + zooms[:, 1, None], + y_offset, + tf.zeros((num_zooms, 2), tf.float32), + ], + axis=1, + ) + + +class RandomZoomTest(tf.test.TestCase): + def test_consistency_with_old_impl_in(self): + image_shape = (16, 32, 32, 3) + fixed_height_factor = (-0.5, -0.5) + fixed_width_factor = (-0.5, -0.5) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = RandomZoom( + fixed_height_factor, fixed_width_factor, interpolation="nearest" + ) + old_layer = OldRandomZoom( + fixed_height_factor, fixed_width_factor, interpolation="nearest" + ) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output) + + def test_consistency_with_old_impl_out(self): + image_shape = (16, 32, 32, 3) + fixed_height_factor = (0.5, 0.5) + fixed_width_factor = (0.8, 0.8) + image = tf.random.uniform(shape=image_shape) * 255.0 + + layer = RandomZoom( + fixed_height_factor, fixed_width_factor, interpolation="nearest" + ) + old_layer = OldRandomZoom( + fixed_height_factor, fixed_width_factor, interpolation="nearest" + ) + + output = layer(image) + old_output = old_layer(image) + + self.assertAllClose(old_output, output) + + +if __name__ == "__main__": + # Run benchmark + (x_train, _), _ = tf.keras.datasets.cifar10.load_data() + x_train = x_train.astype(np.float32) + + num_images = [100, 200, 500, 1000] + results = {} + aug_candidates = [RandomZoom, OldRandomZoom] + aug_args = {"height_factor": 0.2, "width_factor": 0.3} + + for aug in aug_candidates: + # Eager Mode + c = aug.__name__ + layer = aug(**aug_args) + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + layer(x_train[:n_images]) + + t0 = time.time() + r1 = layer(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # Graph Mode + c = aug.__name__ + " Graph Mode" + layer = aug(**aug_args) + + @tf.function() + def apply_aug(inputs): + return layer(inputs) + + runtimes = [] + print(f"Timing {c}") + + for n_images in num_images: + # warmup + apply_aug(x_train[:n_images]) + + t0 = time.time() + r1 = apply_aug(x_train[:n_images]) + t1 = time.time() + runtimes.append(t1 - t0) + print(f"Runtime for {c}, n_images={n_images}: {t1-t0}") + results[c] = runtimes + + # XLA Mode + # cannot run tf.raw_ops.ImageProjectiveTransformV3 on XLA + # for more information please refer: + # https://github.com/tensorflow/tensorflow/issues/55194 + + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison.png") + + # So we can actually see more relevant margins + del results[aug_candidates[1].__name__] + plt.figure() + for key in results: + plt.plot(num_images, results[key], label=key) + plt.xlabel("Number images") + + plt.ylabel("Runtime (seconds)") + plt.legend() + plt.savefig("comparison_no_old_eager.png") + + # Run unit tests + tf.test.main() diff --git a/keras_cv/layers/preprocessing/random_zoom.py b/keras_cv/layers/preprocessing/random_zoom.py index 649310e79a..d10e502c81 100644 --- a/keras_cv/layers/preprocessing/random_zoom.py +++ b/keras_cv/layers/preprocessing/random_zoom.py @@ -1,4 +1,4 @@ -# Copyright 2022 The KerasCV Authors +# Copyright 2023 The KerasCV Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,10 +16,10 @@ import tensorflow as tf from keras import backend -from keras_cv.layers.preprocessing.base_image_augmentation_layer import ( - BaseImageAugmentationLayer, +from keras_cv.layers.preprocessing.vectorized_base_image_augmentation_layer import ( + VectorizedBaseImageAugmentationLayer, ) -from keras_cv.utils import preprocessing +from keras_cv.utils import preprocessing as preprocessing_utils # In order to support both unbatched and batched inputs, the horizontal # and verticle axis is reverse indexed @@ -28,7 +28,7 @@ @tf.keras.utils.register_keras_serializable(package="keras_cv") -class RandomZoom(BaseImageAugmentationLayer): +class RandomZoom(VectorizedBaseImageAugmentationLayer): """A preprocessing layer which randomly zooms images during training. This layer will randomly zoom in or out on each axis of an image @@ -129,7 +129,7 @@ def __init__( f"got {width_factor}" ) - preprocessing.check_fill_mode_and_interpolation( + preprocessing_utils.check_fill_mode_and_interpolation( fill_mode, interpolation ) @@ -138,60 +138,51 @@ def __init__( self.interpolation = interpolation self.seed = seed - def get_random_transformation(self, image=None, **kwargs): - height_zoom = self._random_generator.random_uniform( - shape=[1, 1], + def get_random_transformation_batch(self, batch_size, **kwargs): + height_zooms = self._random_generator.random_uniform( + shape=[batch_size, 1], minval=1.0 + self.height_lower, maxval=1.0 + self.height_upper, ) if self.width_factor is not None: - width_zoom = self._random_generator.random_uniform( - shape=[1, 1], + width_zooms = self._random_generator.random_uniform( + shape=[batch_size, 1], minval=1.0 + self.width_lower, maxval=1.0 + self.width_upper, ) else: - width_zoom = height_zoom + width_zooms = height_zooms - return {"height_zoom": height_zoom, "width_zoom": width_zoom} + return {"height_zooms": height_zooms, "width_zooms": width_zooms} - def augment_image(self, image, transformation, **kwargs): - image = preprocessing.ensure_tensor(image, self.compute_dtype) - original_shape = image.shape - image = tf.expand_dims(image, 0) - image_shape = tf.shape(image) + def augment_ragged_image(self, image, transformation, **kwargs): + return self.augment_images( + images=image, transformations=transformation, **kwargs + ) + + def augment_images(self, images, transformations, **kwargs): + images = preprocessing_utils.ensure_tensor(images, self.compute_dtype) + original_shape = images.shape + image_shape = tf.shape(images) img_hd = tf.cast(image_shape[H_AXIS], tf.float32) img_wd = tf.cast(image_shape[W_AXIS], tf.float32) - width_zoom = transformation["width_zoom"] - height_zoom = transformation["height_zoom"] + width_zooms = transformations["width_zooms"] + height_zooms = transformations["height_zooms"] zooms = tf.cast( - tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32 + tf.concat([width_zooms, height_zooms], axis=1), dtype=tf.float32 ) - output = preprocessing.transform( - image, + outputs = preprocessing_utils.transform( + images, self.get_zoom_matrix(zooms, img_hd, img_wd), fill_mode=self.fill_mode, fill_value=self.fill_value, interpolation=self.interpolation, ) - output = tf.squeeze(output, 0) - output.set_shape(original_shape) - return output + outputs.set_shape(original_shape) + return outputs - def augment_label(self, label, transformation, **kwargs): - return label - - def get_config(self): - config = { - "height_factor": self.height_factor, - "width_factor": self.width_factor, - "fill_mode": self.fill_mode, - "fill_value": self.fill_value, - "interpolation": self.interpolation, - "seed": self.seed, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) + def augment_labels(self, labels, transformations, **kwargs): + return labels def get_zoom_matrix(self, zooms, image_height, image_width, name=None): """Returns projective transform(s) for the given zoom(s). @@ -234,3 +225,19 @@ def get_zoom_matrix(self, zooms, image_height, image_width, name=None): ], axis=1, ) + + def get_config(self): + config = { + "height_factor": self.height_factor, + "width_factor": self.width_factor, + "fill_mode": self.fill_mode, + "fill_value": self.fill_value, + "interpolation": self.interpolation, + "seed": self.seed, + } + base_config = super().get_config() + return dict(list(base_config.items()) + list(config.items())) + + @classmethod + def from_config(cls, config): + return cls(**config) diff --git a/keras_cv/layers/preprocessing/random_zoom_test.py b/keras_cv/layers/preprocessing/random_zoom_test.py index f3f5b0cfd0..b967ef2922 100644 --- a/keras_cv/layers/preprocessing/random_zoom_test.py +++ b/keras_cv/layers/preprocessing/random_zoom_test.py @@ -116,6 +116,17 @@ def test_random_zoom_inference(self): actual_output = layer(input_images, training=False) self.assertAllClose(expected_output, actual_output) + def test_random_zoom_on_batched_images_independently(self): + image = tf.random.uniform(shape=(100, 100, 3)) + input_images = tf.stack([image, image], axis=0) + + layer = RandomZoom( + height_factor=(-0.4, -0.5), width_factor=(-0.2, -0.3) + ) + + results = layer(input_images) + self.assertNotAllClose(results[0], results[1]) + def test_config_with_custom_name(self): layer = RandomZoom(0.5, 0.6, name="image_preproc") config = layer.get_config() From 54fcdf2bb864888897fb5c6bb45b52ff27298bfe Mon Sep 17 00:00:00 2001 From: Luke Wood Date: Sat, 25 Feb 2023 14:04:19 -0800 Subject: [PATCH 27/27] [OD metrics]: (Step 1/3) Align _BoxRecall to the new object detection API (#1396) * Rewrite tests and update the docstring * Rewrite tests and update the docstring * filter out sentinels * filter out sentinels * filter out sentinels * math.top_k * Make recall work with dictionary input * recall unit tests fixed * Create COCO test, begin fixiing metric evaluation * Create COCO test, begin fixiing metric evaluation * Create COCO test, begin fixiing metric evaluation * Add prediction decoding * Remove changes to RetinaNet * Metrics and serialization changes * Move copy() call to to_dense * polish 1 * polish round 2 * polish round 3 * polish round 3 * polish round 3 * Rename recall * Rename recall * Reformat * Lint fix * Lint fix * Recall test implemented * Remove unused util * udpate docstring per francois' comment --- benchmarks/metrics/coco/recall_performance.py | 2 +- keras_cv/bounding_box/__init__.py | 1 + keras_cv/bounding_box/ensure_tensor.py | 26 ++ keras_cv/bounding_box/ensure_tensor_test.py | 39 +++ keras_cv/bounding_box/to_dense.py | 5 + keras_cv/bounding_box/validate_format.py | 10 +- .../base_image_augmentation_layer.py | 9 +- keras_cv/metrics/__init__.py | 2 +- .../mean_average_precision_test.py | 2 +- .../recall_correctness_test.py | 54 ++-- keras_cv/metrics/coco/recall.py | 122 ++++---- keras_cv/metrics/coco/recall_test.py | 293 ++++++++++-------- keras_cv/metrics/coco/utils.py | 124 +++++--- keras_cv/metrics/serialization_test.py | 4 +- 14 files changed, 426 insertions(+), 267 deletions(-) create mode 100644 keras_cv/bounding_box/ensure_tensor.py create mode 100644 keras_cv/bounding_box/ensure_tensor_test.py diff --git a/benchmarks/metrics/coco/recall_performance.py b/benchmarks/metrics/coco/recall_performance.py index d20aeb879d..dee9094ff9 100644 --- a/benchmarks/metrics/coco/recall_performance.py +++ b/benchmarks/metrics/coco/recall_performance.py @@ -53,7 +53,7 @@ def produce_random_data(include_confidence=False, num_images=128, classes=20): for images in n_images: y_true = produce_random_data(num_images=images) y_pred = produce_random_data(num_images=images, include_confidence=True) - metric = coco._COCORecall(class_ids) + metric = coco._BoxRecall(class_ids) # warm up metric.update_state(y_true, y_pred) metric.result() diff --git a/keras_cv/bounding_box/__init__.py b/keras_cv/bounding_box/__init__.py index ce7a03a93c..3cea65744e 100644 --- a/keras_cv/bounding_box/__init__.py +++ b/keras_cv/bounding_box/__init__.py @@ -15,6 +15,7 @@ from keras_cv.bounding_box.converters import _decode_deltas_to_boxes from keras_cv.bounding_box.converters import _encode_box_to_deltas from keras_cv.bounding_box.converters import convert_format +from keras_cv.bounding_box.ensure_tensor import ensure_tensor from keras_cv.bounding_box.formats import CENTER_XYWH from keras_cv.bounding_box.formats import REL_XYXY from keras_cv.bounding_box.formats import REL_YXYX diff --git a/keras_cv/bounding_box/ensure_tensor.py b/keras_cv/bounding_box/ensure_tensor.py new file mode 100644 index 0000000000..6c209a45b3 --- /dev/null +++ b/keras_cv/bounding_box/ensure_tensor.py @@ -0,0 +1,26 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from keras_cv.utils import preprocessing + + +def ensure_tensor(boxes, dtype=None): + boxes = boxes.copy() + for key in ["boxes", "classes", "confidence"]: + if key in boxes: + boxes[key] = preprocessing.ensure_tensor( + boxes[key], + dtype=dtype, + ) + return boxes diff --git a/keras_cv/bounding_box/ensure_tensor_test.py b/keras_cv/bounding_box/ensure_tensor_test.py new file mode 100644 index 0000000000..b0504d3185 --- /dev/null +++ b/keras_cv/bounding_box/ensure_tensor_test.py @@ -0,0 +1,39 @@ +# Copyright 2023 The KerasCV Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tensorflow as tf + +from keras_cv import bounding_box + + +class BoundingBoxEnsureTensorTest(tf.test.TestCase): + def test_convert_list(self): + boxes = {"boxes": [[0, 1, 2, 3]], "classes": [0]} + output = bounding_box.ensure_tensor(boxes) + self.assertFalse( + any([isinstance(boxes[k], tf.Tensor) for k in boxes.keys()]) + ) + self.assertTrue( + all([isinstance(output[k], tf.Tensor) for k in output.keys()]) + ) + + def test_confidence(self): + boxes = {"boxes": [[0, 1, 2, 3]], "classes": [0], "confidence": [0.245]} + output = bounding_box.ensure_tensor(boxes) + self.assertFalse( + any([isinstance(boxes[k], tf.Tensor) for k in boxes.keys()]) + ) + self.assertTrue( + all([isinstance(output[k], tf.Tensor) for k in output.keys()]) + ) diff --git a/keras_cv/bounding_box/to_dense.py b/keras_cv/bounding_box/to_dense.py index 01de501e6b..e243d922b6 100644 --- a/keras_cv/bounding_box/to_dense.py +++ b/keras_cv/bounding_box/to_dense.py @@ -48,8 +48,13 @@ def to_dense(bounding_boxes, max_boxes=None, default_value=-1): """ info = validate_format.validate_format(bounding_boxes) + # guards against errors in metrics regarding modification of inputs. + # also guards against unexpected behavior when modifying downstream + bounding_boxes = bounding_boxes.copy() + # Already running in masked mode if not info["ragged"]: + # even if already ragged, still copy the dictionary for API consistency return bounding_boxes if isinstance(bounding_boxes["classes"], tf.RaggedTensor): diff --git a/keras_cv/bounding_box/validate_format.py b/keras_cv/bounding_box/validate_format.py index b260dd554c..4bc8faa3d1 100644 --- a/keras_cv/bounding_box/validate_format.py +++ b/keras_cv/bounding_box/validate_format.py @@ -14,7 +14,7 @@ import tensorflow as tf -def validate_format(bounding_boxes): +def validate_format(bounding_boxes, variable_name="bounding_boxes"): """validates that a given set of bounding boxes complies with KerasCV format. For a set of bounding boxes to be valid it must satisfy the following conditions: @@ -37,14 +37,14 @@ def validate_format(bounding_boxes): """ if not isinstance(bounding_boxes, dict): raise ValueError( - "Expected `bounding_boxes` to be a dictionary, got " - f"`bounding_boxes={bounding_boxes}`." + f"Expected `{variable_name}` to be a dictionary, got " + f"`{variable_name}={bounding_boxes}`." ) if not all([x in bounding_boxes for x in ["boxes", "classes"]]): raise ValueError( - "Expected `bounding_boxes` to be a dictionary containing keys " + f"Expected `{variable_name}` to be a dictionary containing keys " "`'classes'` and `'boxes'`. Got " - f"`bounding_boxes.keys()={bounding_boxes.keys()}`." + f"`{variable_name}.keys()={bounding_boxes.keys()}`." ) boxes = bounding_boxes.get("boxes") diff --git a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py index 574b48152e..1bb6741120 100644 --- a/keras_cv/layers/preprocessing/base_image_augmentation_layer.py +++ b/keras_cv/layers/preprocessing/base_image_augmentation_layer.py @@ -541,12 +541,7 @@ def _ensure_inputs_are_compute_dtype(self, inputs): self.compute_dtype, ) if BOUNDING_BOXES in inputs: - inputs[BOUNDING_BOXES]["boxes"] = preprocessing.ensure_tensor( - inputs[BOUNDING_BOXES]["boxes"], - self.compute_dtype, - ) - inputs[BOUNDING_BOXES]["classes"] = preprocessing.ensure_tensor( - inputs[BOUNDING_BOXES]["classes"], - self.compute_dtype, + inputs[BOUNDING_BOXES] = bounding_box.ensure_tensor( + inputs[BOUNDING_BOXES], dtype=self.compute_dtype ) return inputs diff --git a/keras_cv/metrics/__init__.py b/keras_cv/metrics/__init__.py index 13951e07bd..4bf77abb91 100644 --- a/keras_cv/metrics/__init__.py +++ b/keras_cv/metrics/__init__.py @@ -15,4 +15,4 @@ from keras_cv.metrics.coco.mean_average_precision import ( _COCOMeanAveragePrecision, ) -from keras_cv.metrics.coco.recall import _COCORecall +from keras_cv.metrics.coco.recall import _BoxRecall diff --git a/keras_cv/metrics/coco/numerical_tests/mean_average_precision_test.py b/keras_cv/metrics/coco/numerical_tests/mean_average_precision_test.py index a9770dcc8b..2998e7329d 100644 --- a/keras_cv/metrics/coco/numerical_tests/mean_average_precision_test.py +++ b/keras_cv/metrics/coco/numerical_tests/mean_average_precision_test.py @@ -17,7 +17,7 @@ import tensorflow as tf from keras_cv import bounding_box -from keras_cv.metrics.coco import _COCOMeanAveragePrecision +from keras_cv.metrics import _COCOMeanAveragePrecision SAMPLE_FILE = os.path.dirname(os.path.abspath(__file__)) + "/sample_boxes.npz" diff --git a/keras_cv/metrics/coco/numerical_tests/recall_correctness_test.py b/keras_cv/metrics/coco/numerical_tests/recall_correctness_test.py index 215cd781e4..76d5af3fff 100644 --- a/keras_cv/metrics/coco/numerical_tests/recall_correctness_test.py +++ b/keras_cv/metrics/coco/numerical_tests/recall_correctness_test.py @@ -12,19 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Tests to ensure that _COCORecall computes the correct values..""" +"""Tests to ensure that _BoxRecall computes the correct values.""" import os import numpy as np import tensorflow as tf from keras_cv import bounding_box -from keras_cv.metrics import _COCORecall +from keras_cv.metrics import _BoxRecall SAMPLE_FILE = os.path.dirname(os.path.abspath(__file__)) + "/sample_boxes.npz" -delta = 0.04 - class RecallCorrectnessTest(tf.test.TestCase): """Unit tests that test Keras COCO metric results against the known good ones of @@ -38,11 +36,11 @@ class RecallCorrectnessTest(tf.test.TestCase): Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.641 """ - def DISABLE_test_recall_correctness_maxdets_1(self): + def test_recall_correctness_maxdets_1(self): y_true, y_pred, categories = load_samples(SAMPLE_FILE) # Area range all - recall = _COCORecall( + recall = _BoxRecall( bounding_box_format="xyxy", class_ids=categories + [1000], max_detections=1, @@ -50,13 +48,13 @@ def DISABLE_test_recall_correctness_maxdets_1(self): recall.update_state(y_true, y_pred) result = recall.result().numpy() - self.assertAlmostEqual(result, 0.478, delta=delta) + self.assertAlmostEqual(result, 0.478, delta=0.01) - def DISABLE_test_recall_correctness_maxdets_10(self): + def test_recall_correctness_maxdets_10(self): y_true, y_pred, categories = load_samples(SAMPLE_FILE) # Area range all - recall = _COCORecall( + recall = _BoxRecall( bounding_box_format="xyxy", class_ids=categories + [1000], max_detections=10, @@ -64,13 +62,13 @@ def DISABLE_test_recall_correctness_maxdets_10(self): recall.update_state(y_true, y_pred) result = recall.result().numpy() - self.assertAlmostEqual(result, 0.645, delta=delta) + self.assertAlmostEqual(result, 0.645, delta=0.01) - def DISABLE_test_recall_correctness_maxdets_100(self): + def test_recall_correctness_maxdets_100(self): y_true, y_pred, categories = load_samples(SAMPLE_FILE) # Area range all - recall = _COCORecall( + recall = _BoxRecall( bounding_box_format="xyxy", class_ids=categories + [1000], max_detections=100, @@ -78,11 +76,11 @@ def DISABLE_test_recall_correctness_maxdets_100(self): recall.update_state(y_true, y_pred) result = recall.result().numpy() - self.assertAlmostEqual(result, 0.648, delta=delta) + self.assertAlmostEqual(result, 0.648, delta=0.01) - def DISABLE_test_recall_correctness_small_objects(self): + def test_recall_correctness_small_objects(self): y_true, y_pred, categories = load_samples(SAMPLE_FILE) - recall = _COCORecall( + recall = _BoxRecall( bounding_box_format="xyxy", class_ids=categories + [1000], max_detections=100, @@ -91,11 +89,11 @@ def DISABLE_test_recall_correctness_small_objects(self): recall.update_state(y_true, y_pred) result = recall.result().numpy() - self.assertAlmostEqual(result, 0.628, delta=delta) + self.assertAlmostEqual(result, 0.628, delta=0.03) - def DISABLE_test_recall_correctness_medium_objects(self): + def test_recall_correctness_medium_objects(self): y_true, y_pred, categories = load_samples(SAMPLE_FILE) - recall = _COCORecall( + recall = _BoxRecall( bounding_box_format="xyxy", class_ids=categories + [1000], max_detections=100, @@ -104,11 +102,11 @@ def DISABLE_test_recall_correctness_medium_objects(self): recall.update_state(y_true, y_pred) result = recall.result().numpy() - self.assertAlmostEqual(result, 0.653, delta=delta) + self.assertAlmostEqual(result, 0.653, delta=0.04) - def DISABLE_test_recall_correctness_large_objects(self): + def test_recall_correctness_large_objects(self): y_true, y_pred, categories = load_samples(SAMPLE_FILE) - recall = _COCORecall( + recall = _BoxRecall( bounding_box_format="xyxy", class_ids=categories + [1000], max_detections=100, @@ -117,7 +115,7 @@ def DISABLE_test_recall_correctness_large_objects(self): recall.update_state(y_true, y_pred) result = recall.result().numpy() - self.assertAlmostEqual(result, 0.641, delta=delta) + self.assertAlmostEqual(result, 0.641, delta=0.026) def load_samples(fname): @@ -125,10 +123,20 @@ def load_samples(fname): y_true = npzfile["arr_0"].astype(np.float32) y_pred = npzfile["arr_1"].astype(np.float32) + y_true = { + "boxes": y_true[:, :, :4], + "classes": y_true[:, :, 4], + } + y_pred = { + "boxes": y_pred[:, :, :4], + "classes": y_pred[:, :, 4], + "confidence": y_pred[:, :, 5], + } + y_true = bounding_box.convert_format(y_true, source="xywh", target="xyxy") y_pred = bounding_box.convert_format(y_pred, source="xywh", target="xyxy") - categories = set(int(x) for x in y_true[:, :, 4].numpy().flatten()) + categories = set(int(x) for x in y_true["classes"].flatten()) categories = [x for x in categories if x != -1] return y_true, y_pred, categories diff --git a/keras_cv/metrics/coco/recall.py b/keras_cv/metrics/coco/recall.py index ed78225e82..c0126d297f 100644 --- a/keras_cv/metrics/coco/recall.py +++ b/keras_cv/metrics/coco/recall.py @@ -22,13 +22,19 @@ from keras_cv.metrics.coco import utils -class _COCORecall(keras.metrics.Metric): - """_COCORecall computes the COCO recall metric. - - A usage guide is available on keras.io: - [Using KerasCV COCO metrics](https://keras.io/guides/keras_cv/coco_metrics/). - Full implementation details are available in the - [KerasCV COCO metrics whitepaper](https://arxiv.org/abs/2207.12120). +class _BoxRecall(keras.metrics.Metric): + """_BoxRecall computes recall based on varying true positive IoU thresholds. + + _BoxRecall is analagous to traditional Recall. The primary distinction is + that when operating in the problem domain of object detection there exists + ambiguity in what is considered a true positive. The _BoxRecall metric + works by using the Intersection over Union (IoU) metric to determine whether + or not a detection is a true positive or a false positive. For each + detection the IoU metric is computed for all ground truth boxes of the same + category. If the IoU is above the selected threshold `t`, then the box is + considered a true positive. If not, it is marked as a false positive. An + average is taken across many `t`, or IoU thresholds. These thresholds are + specified in the `iou_thresholds` argument. Args: class_ids: The class IDs to evaluate the metric for. To evaluate for @@ -49,34 +55,22 @@ class _COCORecall(keras.metrics.Metric): than `32**2`, and smaller than `1000000**2`. max_detections: number of maximum detections a model is allowed to make. Must be an integer, defaults to `100`. - Usage: - _COCORecall accepts two Tensors as input to it's `update_state` method. - These Tensors represent bounding boxes in `corners` format. Utilities - to convert Tensors from `xywh` to `corners` format can be found in - `keras_cv.utils.bounding_box`. + Usage: - Each image in a dataset may have a different number of bounding boxes, - both in the ground truth dataset and the prediction set. In order to - account for this, you may either pass a `tf.RaggedTensor`, or pad Tensors - with `-1`s to indicate unused boxes. A utility function to perform this - padding is available at - `keras_cv.bounding_box.to_dense`. + _BoxRecall accepts two dictionaries that comply with KerasCV's bounding box + specification as inputs to it's `update_state` method. + These dictionaries represent bounding boxes in the specified + `bounding_box_format`. ```python - coco_recall = keras_cv.metrics._COCORecall( + coco_recall = keras_cv.metrics._BoxRecall( bounding_box_format='xyxy', max_detections=100, class_ids=[1] ) - - y_true = np.array([[[0, 0, 10, 10, 1], [20, 20, 10, 10, 1]]]).astype(np.float32) - y_pred = np.array([[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]]).astype( - np.float32 - ) - coco_recall.update_state(y_true, y_pred) - coco_recall.result() - # 0.5 + od_model.compile(metrics=[coco_recall]) + od_model.fit(my_dataset) ``` """ @@ -129,26 +123,33 @@ def reset_state(self): @tf.function def update_state(self, y_true, y_pred, sample_weight=None): - """ - Args: - y_true: a bounding box Tensor in corners format. - y_pred: a bounding box Tensor in corners format. - sample_weight: Currently unsupported. - """ if sample_weight is not None: warnings.warn( "sample_weight is not yet supported in keras_cv COCO metrics." ) - y_true = tf.cast(y_true, self.compute_dtype) - y_pred = tf.cast(y_pred, self.compute_dtype) + y_true = bounding_box.ensure_tensor(y_true, dtype=self.compute_dtype) + y_pred = bounding_box.ensure_tensor(y_pred, dtype=self.compute_dtype) + bounding_box.validate_format( + y_true, + variable_name="y_true", + ) + bounding_box.validate_format( + y_pred, + variable_name="y_pred", + ) - # TODO(lukewood): Add first party RaggedTensor support. Currently - # this could cause an OOM error if users are not expecting to convert - # these tensors to dense tensors. - if isinstance(y_true, tf.RaggedTensor): - y_true = y_true.to_tensor(default_value=-1) - if isinstance(y_pred, tf.RaggedTensor): - y_pred = y_pred.to_tensor(default_value=-1) + if y_true["boxes"].shape.rank != 3 or y_pred["boxes"].shape.rank != 3: + raise ValueError( + "Expected `y_true` and `y_pred` to be batched. " + "Received " + f"`y_true['boxes'].shape.rank={y_true['boxes'].shape.ranks}` " + "and " + f"`y_pred['boxes'].shape.rank={y_true['boxes'].shape.ranks}`. " + "Expected both to be 3." + ) + + y_true = bounding_box.to_dense(y_true) + y_pred = bounding_box.to_dense(y_pred) y_true = bounding_box.convert_format( y_true, @@ -163,11 +164,7 @@ def update_state(self, y_true, y_pred, sample_weight=None): dtype=self.compute_dtype, ) - y_pred = utils.sort_bounding_boxes( - y_pred, axis=bounding_box.XYXY.CONFIDENCE - ) - - num_images = tf.shape(y_true)[0] + num_images = tf.shape(y_true["classes"])[0] iou_thresholds = tf.constant(self.iou_thresholds, dtype=tf.float32) class_ids = tf.constant(self.class_ids, dtype=tf.float32) @@ -179,8 +176,10 @@ def update_state(self, y_true, y_pred, sample_weight=None): ground_truth_boxes_update = tf.zeros_like(self.ground_truth_boxes) for img in tf.range(num_images): - y_true_for_image = utils.filter_out_sentinels(y_true[img]) - y_pred_for_image = utils.filter_out_sentinels(y_pred[img]) + y_true_for_image = utils.get_boxes_for_image(y_true, img) + + y_pred_for_image = utils.get_boxes_for_image(y_pred, img) + y_pred_for_image = utils.order_by_confidence(y_pred_for_image) if self.area_range is not None: y_true_for_image = utils.filter_boxes_by_area_range( @@ -193,23 +192,28 @@ def update_state(self, y_true, y_pred, sample_weight=None): for k_i in tf.range(num_categories): category = class_ids[k_i] - category_filtered_y_pred = utils.filter_boxes( + category_filtered_y_pred = utils.select_boxes_of_class( y_pred_for_image, - value=category, - axis=bounding_box.XYXY.CLASS, + class_id=category, ) detections = category_filtered_y_pred - if self.max_detections < tf.shape(category_filtered_y_pred)[0]: - detections = category_filtered_y_pred[: self.max_detections] + if ( + self.max_detections + < tf.shape(category_filtered_y_pred["classes"])[0] + ): + detections = utils.slice( + category_filtered_y_pred, self.max_detections + ) - ground_truths = utils.filter_boxes( + ground_truths = utils.select_boxes_of_class( y_true_for_image, - value=category, - axis=bounding_box.XYXY.CLASS, + class_id=category, ) - ious = iou_lib.compute_iou(ground_truths, detections, "yxyx") + ious = iou_lib.compute_iou( + ground_truths["boxes"], detections["boxes"], "yxyx" + ) for t_i in tf.range(num_thresholds): threshold = iou_thresholds[t_i] @@ -228,7 +232,7 @@ def update_state(self, y_true, y_pred, sample_weight=None): ground_truth_boxes_update = tf.tensor_scatter_nd_add( ground_truth_boxes_update, [[k_i]], - [tf.cast(tf.shape(ground_truths)[0], tf.int32)], + [tf.cast(tf.shape(ground_truths["classes"])[0], tf.int32)], ) self.true_positives.assign_add(true_positives_update) diff --git a/keras_cv/metrics/coco/recall_test.py b/keras_cv/metrics/coco/recall_test.py index c6ab5b842b..8a3adf9bc6 100644 --- a/keras_cv/metrics/coco/recall_test.py +++ b/keras_cv/metrics/coco/recall_test.py @@ -11,21 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Tests for _COCORecall.""" +"""Tests for _BoxRecall.""" import numpy as np import tensorflow as tf -from tensorflow import keras -from keras_cv.metrics import _COCORecall +from keras_cv import bounding_box +from keras_cv.metrics import _BoxRecall -class _COCORecallTest(tf.test.TestCase): - def DISABLE_test_runs_inside_model(self): - i = keras.layers.Input((None, None, 6)) - model = keras.Model(i, i) - - recall = _COCORecall( +class BoxRecallTest(tf.test.TestCase): + def test_ragged_tensor_support(self): + recall = _BoxRecall( max_detections=100, bounding_box_format="xyxy", class_ids=[1], @@ -33,60 +30,55 @@ def DISABLE_test_runs_inside_model(self): ) # These would match if they were in the area range - y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype( - np.float32 - ) - y_pred = np.array( - [[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]] - ).astype(np.float32) - - model.compile(metrics=[recall]) - model.evaluate(y_pred, y_true) - - self.assertAllEqual(recall.result(), 1.0) - - def DISABLE_test_ragged_tensor_support(self): - recall = _COCORecall( - max_detections=100, - bounding_box_format="xyxy", - class_ids=[1], - area_range=(0, 64**2), - ) + y_true = { + "boxes": tf.ragged.stack( + [ + tf.constant([[0, 0, 10, 10], [5, 5, 10, 10]], tf.float32), + tf.constant([[0, 0, 10, 10]], tf.float32), + ] + ), + "classes": tf.ragged.stack([tf.constant([1, 1]), tf.constant([1])]), + } - # These would match if they were in the area range - y_true = tf.ragged.stack( - [ - tf.constant([[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]], tf.float32), - tf.constant([[0, 0, 10, 10, 1]], tf.float32), - ] - ) - y_pred = tf.ragged.stack( - [ - tf.constant([[5, 5, 10, 10, 1, 0.9]], tf.float32), - tf.constant( - [[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]], tf.float32 - ), - ] - ) + y_pred = { + "boxes": tf.ragged.stack( + [ + tf.constant([[5, 5, 10, 10]], tf.float32), + tf.constant([[0, 0, 10, 10], [5, 5, 10, 10]], tf.float32), + ] + ), + "classes": tf.ragged.stack([tf.constant([1]), tf.constant([1, 1])]), + "confidence": tf.ragged.stack( + [tf.constant([1.0]), tf.constant([1.0, 0.9])] + ), + } recall.update_state(y_true, y_pred) self.assertAlmostEqual(recall.result(), 2 / 3) - def DISABLE_test_merge_state(self): - y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32) - y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) - y_pred_match = tf.constant( - [[[0, 0, 100, 100, 1, 1.0]]], dtype=tf.float32 - ) - - m1 = _COCORecall( + def test_merge_state(self): + y_true = { + "boxes": [[[0, 0, 100, 100]]], + "classes": [[1]], + } + y_pred = { + "boxes": [[[0, 50, 100, 150]]], + "classes": [[1]], + "confidence": [[1.0]], + } + y_pred_match = { + "boxes": [[[0, 0, 100, 100]]], + "classes": [[1]], + "confidence": [[1.0]], + } + m1 = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.95], class_ids=[1], area_range=(0, 100000**2), max_detections=1, ) - m2 = _COCORecall( + m2 = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.95], class_ids=[1], @@ -99,7 +91,7 @@ def DISABLE_test_merge_state(self): m2.update_state(y_true, y_pred) - metric_result = _COCORecall( + metric_result = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.95], class_ids=[1], @@ -112,27 +104,29 @@ def DISABLE_test_merge_state(self): self.assertEqual([3], metric_result.ground_truth_boxes) self.assertEqual(1 / 3, metric_result.result()) - def DISABLE_test_recall_area_range_filtering(self): - recall = _COCORecall( + def test_recall_area_range_filtering(self): + recall = _BoxRecall( bounding_box_format="xyxy", max_detections=100, class_ids=[1], area_range=(32**2, 64**2), ) + y_true = { + "boxes": [[[0, 0, 10, 10], [5, 5, 10, 10]]], + "classes": [[1, 1]], + } + y_pred = { + "boxes": [[[0, 0, 10, 10], [5, 5, 10, 10]]], + "classes": [[1, 1]], + "confidence": [[1.0, 0.9]], + } - # These would match if they were in the area range - y_true = np.array([[[0, 0, 10, 10, 1], [5, 5, 10, 10, 1]]]).astype( - np.float32 - ) - y_pred = np.array( - [[[0, 0, 10, 10, 1, 1.0], [5, 5, 10, 10, 1, 0.9]]] - ).astype(np.float32) recall.update_state(y_true, y_pred) self.assertAllEqual(recall.result(), 0.0) - def DISABLE_test_missing_categories(self): - recall = _COCORecall( + def test_missing_categories(self): + recall = _BoxRecall( bounding_box_format="xyxy", max_detections=100, class_ids=[1, 2, 3], @@ -154,8 +148,8 @@ def DISABLE_test_missing_categories(self): self.assertEqual(recall.result(), 0.5) - def DISABLE_test_recall_direct_assignment(self): - recall = _COCORecall( + def test_recall_direct_assignment(self): + recall = _BoxRecall( bounding_box_format="xyxy", max_detections=100, class_ids=[1], @@ -171,57 +165,75 @@ def DISABLE_test_recall_direct_assignment(self): self.assertEqual(recall.result(), 0.5) - def DISABLE_test_max_detections_one_third(self): - recall = _COCORecall( + def test_max_detections_one_third(self): + recall = _BoxRecall( bounding_box_format="xyxy", max_detections=1, class_ids=[1], area_range=(0, 1e9**2), ) - y_true = np.array( - [ + y_true = { + "boxes": [ [ - [0, 0, 100, 100, 1], - [100, 100, 200, 200, 1], - [300, 300, 400, 400, 1], + [0, 0, 100, 100], + [100, 100, 200, 200], + [300, 300, 400, 400], ] - ] - ).astype(np.float32) - y_pred = np.concatenate([y_true, np.ones((1, 3, 1))], axis=-1).astype( - np.float32 - ) + ], + "classes": [[1, 1, 1]], + } + y_pred = { + "boxes": [ + [ + [0, 0, 100, 100], + [100, 100, 200, 200], + [300, 300, 400, 400], + ] + ], + "classes": [[1, 1, 1]], + "confidence": [[1, 1, 1]], + } # with max_dets=1, only 1 of the three boxes can be found recall.update_state(y_true, y_pred) self.assertAlmostEqual(recall.result().numpy(), 1 / 3) - def DISABLE_test_max_detections(self): - recall = _COCORecall( + def test_max_detections(self): + recall = _BoxRecall( bounding_box_format="xyxy", max_detections=3, class_ids=[1], area_range=(0, 1e9**2), ) - y_true = np.array( - [ + y_true = { + "boxes": [ [ - [0, 0, 100, 100, 1], - [100, 100, 200, 200, 1], - [300, 300, 400, 400, 1], + [0, 0, 100, 100], + [100, 100, 200, 200], + [300, 300, 400, 400], ] - ] - ).astype(np.float32) - y_pred = np.concatenate([y_true, np.ones((1, 3, 1))], axis=-1).astype( - np.float32 - ) + ], + "classes": [[1, 1, 1]], + } + y_pred = { + "boxes": [ + [ + [0, 0, 100, 100], + [100, 100, 200, 200], + [300, 300, 400, 400], + ] + ], + "classes": [[1, 1, 1]], + "confidence": [[1, 1, 1]], + } # with max_dets=1, only 1 of the three boxes can be found recall.update_state(y_true, y_pred) self.assertAlmostEqual(recall.result().numpy(), 1.0) - def DISABLE_test_recall_direct_assignment_one_third(self): - recall = _COCORecall( + def test_recall_direct_assignment_one_third(self): + recall = _BoxRecall( bounding_box_format="xyxy", max_detections=100, class_ids=[1], @@ -238,13 +250,18 @@ def DISABLE_test_recall_direct_assignment_one_third(self): self.assertAlmostEqual(recall.result().numpy(), 1 / 3) - def DISABLE_test_area_range_bounding_box_counting(self): - y_true = tf.constant( - [[[0, 0, 100, 100, 1], [0, 0, 100, 100, 1]]], dtype=tf.float32 - ) - y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) + def test_area_range_bounding_box_counting(self): + y_true = { + "boxes": [[[0.0, 0.0, 100.0, 100.0], [0.0, 0.0, 100.0, 100.0]]], + "classes": [[1.0, 1.0]], + } + y_pred = { + "boxes": [[[0.0, 50.0, 100.0, 150.0]]], + "classes": [[1.0]], + "confidence": [[1.0]], + } # note the low iou threshold - metric = _COCORecall( + metric = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.15], class_ids=[1], @@ -255,13 +272,18 @@ def DISABLE_test_area_range_bounding_box_counting(self): self.assertEqual([[2]], metric.ground_truth_boxes) self.assertEqual([[1]], metric.true_positives) - def DISABLE_test_true_positive_counting_one_good_one_bad(self): - y_true = tf.constant( - [[[0, 0, 100, 100, 1], [0, 0, 100, 100, 1]]], dtype=tf.float32 - ) - y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) + def test_true_positive_counting_one_good_one_bad(self): + y_true = { + "boxes": [[[0.0, 0.0, 100.0, 100.0], [0.0, 0.0, 100.0, 100.0]]], + "classes": [[1.0, 1.0]], + } + y_pred = { + "boxes": [[[0.0, 50.0, 100.0, 150.0]]], + "classes": [[1.0]], + "confidence": [[1.0]], + } # note the low iou threshold - metric = _COCORecall( + metric = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.15], class_ids=[1], @@ -273,17 +295,15 @@ def DISABLE_test_true_positive_counting_one_good_one_bad(self): self.assertEqual([2], metric.ground_truth_boxes) self.assertEqual([[1]], metric.true_positives) - def DISABLE_test_true_positive_counting_one_true_two_pred(self): - y_true = tf.constant( - [[[0, 0, 100, 100, 1]]], - dtype=tf.float32, - ) - y_pred = tf.constant( - [[[0, 50, 100, 150, 1, 0.90], [0, 0, 100, 100, 1, 1.0]]], - dtype=tf.float32, - ) + def test_true_positive_counting_one_true_two_pred(self): + y_true = {"boxes": [[[0.0, 0.0, 100.0, 100.0]]], "classes": [[1.0]]} + y_pred = { + "boxes": [[[0.0, 50.0, 100.0, 150.0], [0.0, 0.0, 100.0, 100.0]]], + "classes": [[1.0, 1.0]], + "confidence": [[0.8999999761581421, 1.0]], + } # note the low iou threshold - metric = _COCORecall( + metric = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.15], class_ids=[1], @@ -293,17 +313,26 @@ def DISABLE_test_true_positive_counting_one_true_two_pred(self): metric.update_state(y_true, y_pred) self.assertEqual([[1]], metric.true_positives) - y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32) - y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) + y_true = {"boxes": [[[0.0, 0.0, 100.0, 100.0]]], "classes": [[1.0]]} + y_pred = { + "boxes": [[[0.0, 50.0, 100.0, 150.0]]], + "classes": [[1.0]], + "confidence": [[1.0]], + } metric.update_state(y_true, y_pred) self.assertEqual([[2]], metric.true_positives) - def DISABLE_test_mixed_dtypes(self): - y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float64) - y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) + def test_mixed_dtypes(self): + y_true = {"boxes": [[[0.0, 0.0, 100.0, 100.0]]], "classes": [[1.0]]} + y_true = bounding_box.ensure_tensor(y_true, dtype=tf.float64) + y_pred = { + "boxes": [[[0.0, 50.0, 100.0, 150.0]]], + "classes": [[1.0]], + "confidence": [[1.0]], + } - metric = _COCORecall( + metric = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.15], class_ids=[1], @@ -313,12 +342,16 @@ def DISABLE_test_mixed_dtypes(self): metric.update_state(y_true, y_pred) self.assertEqual(metric.result(), 1.0) - def DISABLE_test_matches_single_box(self): - y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32) - y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) + def test_matches_single_box(self): + y_true = {"boxes": [[[0.0, 0.0, 100.0, 100.0]]], "classes": [[1.0]]} + y_pred = { + "boxes": [[[0.0, 50.0, 100.0, 150.0]]], + "classes": [[1.0]], + "confidence": [[1.0]], + } # note the low iou threshold - metric = _COCORecall( + metric = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.15], class_ids=[1], @@ -329,11 +362,15 @@ def DISABLE_test_matches_single_box(self): self.assertEqual([[1]], metric.true_positives) - def DISABLE_test_matches_single_false_positive(self): - y_true = tf.constant([[[0, 0, 100, 100, 1]]], dtype=tf.float32) - y_pred = tf.constant([[[0, 50, 100, 150, 1, 1.0]]], dtype=tf.float32) + def test_matches_single_false_positive(self): + y_true = {"boxes": [[[0.0, 0.0, 100.0, 100.0]]], "classes": [[1.0]]} + y_pred = { + "boxes": [[[0.0, 50.0, 100.0, 150.0]]], + "classes": [[1.0]], + "confidence": [[1.0]], + } - metric = _COCORecall( + metric = _BoxRecall( bounding_box_format="xyxy", iou_thresholds=[0.95], class_ids=[1], diff --git a/keras_cv/metrics/coco/utils.py b/keras_cv/metrics/coco/utils.py index 0f657d7769..9b1d629f3e 100644 --- a/keras_cv/metrics/coco/utils.py +++ b/keras_cv/metrics/coco/utils.py @@ -17,16 +17,29 @@ from keras_cv import bounding_box -def filter_boxes_by_area_range(boxes, min_area, max_area): +def filter_boxes_by_area_range(bounding_boxes, min_area, max_area): + boxes, classes = bounding_boxes["boxes"], bounding_boxes["classes"] + confidence = bounding_boxes.get("confidence", None) + areas = bounding_box_area(boxes) inds = tf.where(tf.math.logical_and(areas >= min_area, areas < max_area)) - return tf.gather_nd(boxes, inds) + + boxes = tf.gather_nd(boxes, inds) + classes = tf.gather_nd(classes, inds) + result = {"boxes": boxes, "classes": classes} + + if confidence is not None: + confidence = tf.gather_nd(bounding_boxes["confidence"], inds) + result["confidence"] = confidence + + return result def bounding_box_area(boxes): """box_areas returns the area of the provided bounding boxes. + Args: - boxes: Tensor of bounding boxes of shape `[..., 4+]` in corners format. + boxes: Tensor of bounding boxes of shape `[..., 4]` in corners format. Returns: areas: Tensor of areas of shape `[...]`. """ @@ -35,18 +48,44 @@ def bounding_box_area(boxes): return tf.math.multiply(w, h) -def filter_boxes(boxes, value, axis=4): - """filter_boxes is used to select only boxes matching a given class. +def slice(bounding_boxes, idx): + boxes, classes = bounding_boxes["boxes"], bounding_boxes["classes"] + confidence = bounding_boxes.get("confidence", None) + + result = { + "boxes": boxes[:idx], + "classes": classes[:idx], + } + if confidence is not None: + result["confidence"] = confidence[:idx] + return result + + +def select_boxes_of_class(bounding_boxes, class_id): + """select_boxes_of_class is used to select only boxes matching a class. The most common use case for this is to filter to accept only a specific - bounding_box.CLASS. + 'class_id'. + Args: - boxes: Tensor of bounding boxes in format `[images, bounding_boxes, 6]` - value: Value the specified axis must match - axis: Integer identifying the axis on which to sort, default 4 + boxes: Tensor of bounding boxes in KerasCV format. + class_id: class_id the specified axis must match Returns: - boxes: A new Tensor of bounding boxes, where boxes[axis]==value + boxes: A new Tensor of bounding boxes, where boxes[axis]==class_id """ - return tf.gather_nd(boxes, tf.where(boxes[:, axis] == value)) + + boxes, classes = bounding_boxes["boxes"], bounding_boxes["classes"] + confidence = bounding_boxes.get("confidence", None) + indices = tf.where(classes == tf.cast(class_id, classes.dtype)) + + result = { + "boxes": tf.gather_nd(boxes, indices), + "classes": tf.gather_nd(classes, indices), + } + + if confidence is not None: + result["confidence"] = tf.gather_nd(confidence, indices) + + return result def to_sentinel_padded_bounding_box_tensor(box_sets): @@ -63,44 +102,49 @@ def to_sentinel_padded_bounding_box_tensor(box_sets): return tf.ragged.stack(box_sets).to_tensor(default_value=-1) -def filter_out_sentinels(boxes): - """filter_out_sentinels to filter out boxes that were padded on to the prediction - or ground truth bounding_box tensor to ensure dimensions match. - Args: - boxes: Tensor of bounding boxes in format `[bounding_boxes, 6]`, usually from a - single image. - Returns: - boxes: A new Tensor of bounding boxes, where boxes[axis]!=-1. - """ - return tf.gather_nd( - boxes, tf.where(boxes[:, bounding_box.XYXY.CLASS] != -1) - ) +def get_boxes_for_image(bounding_boxes, index): + boxes = bounding_boxes["boxes"] + classes = bounding_boxes["classes"] + result = { + "boxes": boxes[index, ...], + "classes": classes[index, ...], + } + if "confidence" in bounding_boxes: + confidence = bounding_boxes["confidence"] + result["confidence"] = confidence[index, ...] -def sort_bounding_boxes(boxes, axis=5): - """sort_bounding_boxes is used to sort a list of bounding boxes by a given axis. + return result + + +def order_by_confidence(bounding_boxes): + """order_by_confidence is used to sort a batch of bounding boxes. - The most common use case for this is to sort by bounding_box.XYXY.CONFIDENCE, as - this is a part of computing both _COCORecall and _COCOMeanAveragePrecision. Args: - boxes: Tensor of bounding boxes in format `[images, bounding_boxes, 6]` - axis: Integer identifying the axis on which to sort, default 5 + bounding_boxes: dictionarity containing the bounding boxes. Returns: boxes: A new Tensor of Bounding boxes, sorted on an image-wise basis. """ - num_images = tf.shape(boxes)[0] - boxes_sorted_list = tf.TensorArray( - tf.float32, size=num_images, dynamic_size=False - ) - for img in tf.range(num_images): - preds_for_img = boxes[img, :, :] - prediction_scores = preds_for_img[:, axis] - _, idx = tf.math.top_k(prediction_scores, tf.shape(preds_for_img)[0]) - boxes_sorted_list = boxes_sorted_list.write( - img, tf.gather(preds_for_img, idx, axis=0) + boxes = bounding_boxes["boxes"] + classes = bounding_boxes["classes"] + confidence = bounding_boxes["confidence"] + + if boxes.shape.rank != 2: + raise ValueError( + "`order_by_confidence()` should only accept a single " + f"batch of bounding boxes. Received `boxes.shape={boxes.shape}`." ) + _, idx = tf.math.top_k(confidence, tf.shape(confidence)[0]) + + boxes = bounding_boxes["boxes"] + classes = bounding_boxes["classes"] + confidence = bounding_boxes["confidence"] + + boxes = tf.gather(boxes, idx, axis=0) + classes = tf.gather(classes, idx, axis=0) + confidence = tf.gather(confidence, idx, axis=0) - return boxes_sorted_list.stack() + return {"boxes": boxes, "classes": classes, "confidence": confidence} def match_boxes(ious, threshold): diff --git a/keras_cv/metrics/serialization_test.py b/keras_cv/metrics/serialization_test.py index 4e202ee0b5..15b6ce424a 100644 --- a/keras_cv/metrics/serialization_test.py +++ b/keras_cv/metrics/serialization_test.py @@ -20,8 +20,8 @@ class SerializationTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters( ( - "_COCORecall", - metrics._COCORecall, + "_BoxRecall", + metrics._BoxRecall, {"class_ids": [0, 1, 2], "bounding_box_format": "xyxy"}, ), (