From a83d9f9b8c1fd4068f484997f5113aaa28772d00 Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Wed, 8 Jan 2025 15:25:08 -0800 Subject: [PATCH 01/15] remove bounding box utils and refactor retinanet --- keras_hub/api/layers/__init__.py | 6 +- .../modeling}/anchor_generator.py | 29 ++------- .../modeling}/anchor_generator_test.py | 2 +- .../modeling}/box_matcher.py | 3 + .../modeling}/box_matcher_test.py | 2 +- .../modeling}/non_max_supression.py | 60 ++++++++++--------- .../modeling}/non_max_supression_test.py | 6 +- .../image_object_detector_preprocessor.py | 2 +- .../retinanet/retinanet_image_converter.py | 39 ++++++++---- .../retinanet/retinanet_label_encoder.py | 27 ++++----- .../retinanet/retinanet_label_encoder_test.py | 2 +- .../retinanet/retinanet_object_detector.py | 44 +++++++------- .../retinanet_object_detector_test.py | 10 ++-- 13 files changed, 122 insertions(+), 110 deletions(-) rename keras_hub/src/{models/retinanet => layers/modeling}/anchor_generator.py (95%) rename keras_hub/src/{models/retinanet => layers/modeling}/anchor_generator_test.py (96%) rename keras_hub/src/{models/retinanet => layers/modeling}/box_matcher.py (99%) rename keras_hub/src/{models/retinanet => layers/modeling}/box_matcher_test.py (98%) rename keras_hub/src/{models/retinanet => layers/modeling}/non_max_supression.py (93%) rename keras_hub/src/{models/retinanet => layers/modeling}/non_max_supression_test.py (88%) diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index 0e0d31d3a7..c43a4d8b79 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -5,11 +5,16 @@ """ from keras_hub.src.layers.modeling.alibi_bias import AlibiBias +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.layers.modeling.cached_multi_head_attention import ( CachedMultiHeadAttention, ) from keras_hub.src.layers.modeling.f_net_encoder import FNetEncoder from keras_hub.src.layers.modeling.masked_lm_head import MaskedLMHead +from keras_hub.src.layers.modeling.non_max_supression import ( + NonMaxSuppression as NonMaxSupression, +) from keras_hub.src.layers.modeling.position_embedding import PositionEmbedding from keras_hub.src.layers.modeling.reversible_embedding import ( ReversibleEmbedding, @@ -55,7 +60,6 @@ from keras_hub.src.models.resnet.resnet_image_converter import ( ResNetImageConverter, ) -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator from keras_hub.src.models.retinanet.retinanet_image_converter import ( RetinaNetImageConverter, ) diff --git a/keras_hub/src/models/retinanet/anchor_generator.py b/keras_hub/src/layers/modeling/anchor_generator.py similarity index 95% rename from keras_hub/src/models/retinanet/anchor_generator.py rename to keras_hub/src/layers/modeling/anchor_generator.py index a3c3800c49..5aefaaeaf7 100644 --- a/keras_hub/src/models/retinanet/anchor_generator.py +++ b/keras_hub/src/layers/modeling/anchor_generator.py @@ -5,19 +5,14 @@ from keras_hub.src.api_export import keras_hub_export -# TODO: https://github.com/keras-team/keras-hub/issues/1965 -from keras_hub.src.bounding_box.converters import convert_format - @keras_hub_export("keras_hub.layers.AnchorGenerator") class AnchorGenerator(keras.layers.Layer): """Generates anchor boxes for object detection tasks. - This layer creates a set of anchor boxes (also known as default boxes or priors) for use in object detection models, particularly those utilizing Feature Pyramid Networks (FPN). It generates anchors across multiple pyramid levels, with various scales and aspect ratios. - Feature Pyramid Levels: - Levels typically range from 2 to 6 (P2 to P7), corresponding to different resolutions of the input image. @@ -26,7 +21,6 @@ class AnchorGenerator(keras.layers.Layer): detecting smaller objects. - Higher levels (e.g., P7) have lower resolution and are used for larger objects. - Args: bounding_box_format: str. The format of the bounding boxes to be generated. Expected to be a string like 'xyxy', 'xywh', etc. @@ -39,12 +33,10 @@ class AnchorGenerator(keras.layers.Layer): each level. Each number indicates the ratio of width to height. anchor_size: float. Scale of size of the base anchor relative to the feature stride 2^level. - Call arguments: inputs: An image tensor with shape `[B, H, W, C]` or `[H, W, C]`. Its shape will be used to determine anchor sizes. - Returns: Dict: A dictionary mapping feature levels (e.g., 'P3', 'P4', etc.) to anchor boxes. Each entry contains a @@ -53,7 +45,6 @@ class AnchorGenerator(keras.layers.Layer): where H and W are the height and width of the image, stride is 2^level, and num_anchors_per_location is `num_scales * len(aspect_ratios)`. - Example: ```python anchor_generator = AnchorGenerator( @@ -94,29 +85,23 @@ def call(self, inputs): image_shape = images_shape[1:-1] else: image_shape = images_shape[:-1] - image_shape = tuple(image_shape) - multilevel_anchors = {} for level in range(self.min_level, self.max_level + 1): # Calculate the feature map size for this level feat_size_y = math.ceil(image_shape[0] / 2**level) feat_size_x = math.ceil(image_shape[1] / 2**level) - # Calculate the stride (step size) for this level stride_y = image_shape[0] // feat_size_y stride_x = image_shape[1] // feat_size_x - # Generate anchor center points # Start from stride/2 to center anchors on pixels cx = ops.arange(0, feat_size_x, dtype="float32") * stride_x cy = ops.arange(0, feat_size_y, dtype="float32") * stride_y - # Create a grid of anchor centers cy_grid, cx_grid = ops.meshgrid(cy, cx, indexing="ij") cy_grid = ops.reshape(cy_grid, (-1,)) cx_grid = ops.reshape(cx_grid, (-1,)) - shifts = ops.stack((cx_grid, cy_grid, cx_grid, cy_grid), axis=1) sizes = [ int( @@ -124,7 +109,6 @@ def call(self, inputs): ) for scale in range(self.num_scales) ] - base_anchors = self.generate_base_anchors( sizes=sizes, aspect_ratios=self.aspect_ratios ) @@ -133,10 +117,12 @@ def call(self, inputs): anchors = shifts + base_anchors anchors = ops.reshape(anchors, (-1, 4)) - multilevel_anchors[f"P{level}"] = convert_format( - anchors, - source="xyxy", - target=self.bounding_box_format, + multilevel_anchors[f"P{level}"] = ( + keras.utils.bounding_boxes.convert_format( + anchors, + source="xyxy", + target=self.bounding_box_format, + ) ) return multilevel_anchors @@ -145,10 +131,8 @@ def generate_base_anchors(self, sizes, aspect_ratios): aspect_ratios = ops.convert_to_tensor(aspect_ratios) h_ratios = ops.sqrt(aspect_ratios) w_ratios = 1 / h_ratios - ws = ops.reshape(w_ratios[:, None] * sizes[None, :], (-1,)) hs = ops.reshape(h_ratios[:, None] * sizes[None, :], (-1,)) - base_anchors = ops.stack([-1 * ws, -1 * hs, ws, hs], axis=1) / 2 base_anchors = ops.round(base_anchors) return base_anchors @@ -159,7 +143,6 @@ def compute_output_shape(self, input_shape): image_height, image_width = input_shape[1:-1] else: image_height, image_width = input_shape[:-1] - for i in range(self.min_level, self.max_level + 1): multilevel_boxes_shape[f"P{i}"] = ( int( diff --git a/keras_hub/src/models/retinanet/anchor_generator_test.py b/keras_hub/src/layers/modeling/anchor_generator_test.py similarity index 96% rename from keras_hub/src/models/retinanet/anchor_generator_test.py rename to keras_hub/src/layers/modeling/anchor_generator_test.py index 0b71630843..f3bc2510de 100644 --- a/keras_hub/src/models/retinanet/anchor_generator_test.py +++ b/keras_hub/src/layers/modeling/anchor_generator_test.py @@ -2,7 +2,7 @@ from absl.testing import parameterized from keras import ops -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.tests.test_case import TestCase diff --git a/keras_hub/src/models/retinanet/box_matcher.py b/keras_hub/src/layers/modeling/box_matcher.py similarity index 99% rename from keras_hub/src/models/retinanet/box_matcher.py rename to keras_hub/src/layers/modeling/box_matcher.py index dd8a486814..b841e8deb5 100644 --- a/keras_hub/src/models/retinanet/box_matcher.py +++ b/keras_hub/src/layers/modeling/box_matcher.py @@ -1,7 +1,10 @@ import keras from keras import ops +from keras_hub.src.api_export import keras_hub_export + +@keras_hub_export("keras_hub.layers.BoxMatcher") class BoxMatcher(keras.layers.Layer): """Box matching logic based on argmax of highest value (e.g., IOU). diff --git a/keras_hub/src/models/retinanet/box_matcher_test.py b/keras_hub/src/layers/modeling/box_matcher_test.py similarity index 98% rename from keras_hub/src/models/retinanet/box_matcher_test.py rename to keras_hub/src/layers/modeling/box_matcher_test.py index d991f90e5b..5fdf39a7ac 100644 --- a/keras_hub/src/models/retinanet/box_matcher_test.py +++ b/keras_hub/src/layers/modeling/box_matcher_test.py @@ -1,7 +1,7 @@ import numpy as np from keras import ops -from keras_hub.src.models.retinanet.box_matcher import BoxMatcher +from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.tests.test_case import TestCase diff --git a/keras_hub/src/models/retinanet/non_max_supression.py b/keras_hub/src/layers/modeling/non_max_supression.py similarity index 93% rename from keras_hub/src/models/retinanet/non_max_supression.py rename to keras_hub/src/layers/modeling/non_max_supression.py index 5ca52b4dfc..207891ac9e 100644 --- a/keras_hub/src/models/retinanet/non_max_supression.py +++ b/keras_hub/src/layers/modeling/non_max_supression.py @@ -2,22 +2,22 @@ import keras from keras import ops +from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes import ( + validation, +) -# TODO: https://github.com/keras-team/keras-hub/issues/1965 -from keras_hub.src.bounding_box import converters -from keras_hub.src.bounding_box import utils -from keras_hub.src.bounding_box import validate_format +from keras_hub.src.api_export import keras_hub_export EPSILON = 1e-8 +@keras_hub_export("keras_hub.layers.NonMaxSupression") class NonMaxSuppression(keras.layers.Layer): """A Keras layer that decodes predictions of an object detection model. Args: bounding_box_format: The format of bounding boxes of input dataset. - Refer - TODO: link keras core bounding box docs + Refer: for more details on supported bounding box formats. from_logits: boolean, True means input score is logits, False means confidence. @@ -49,7 +49,10 @@ def __init__( self.built = True def call( - self, box_prediction, class_prediction, images=None, image_shape=None + self, + box_prediction, + class_prediction, + images=None, ): """Accepts images and raw scores, returning bounding box predictions. @@ -59,15 +62,24 @@ def call( class_prediction: Dense Tensor of shape [batch, boxes, num_classes]. """ target_format = "yxyx" - if utils.is_relative(self.bounding_box_format): - target_format = utils.as_relative(target_format) + height, width = None, None + + if "rel" in self.bounding_box_format and images is None: + raise ValueError( + "`images` cannot be None when using relative " + "bounding box format." + ) + + if "rel" in self.bounding_box_format: + target_format = "rel_" + target_format + height, width, _ = ops.shape(images) - box_prediction = converters.convert_format( + box_prediction = keras.utils.bounding_boxes.convert_format( box_prediction, source=self.bounding_box_format, target=target_format, - images=images, - image_shape=image_shape, + height=height, + width=width, ) if self.from_logits: class_prediction = ops.sigmoid(class_prediction) @@ -95,17 +107,17 @@ def call( class_prediction, ops.expand_dims(idx, axis=-1), axis=1 ) - box_prediction = converters.convert_format( + box_prediction = keras.utils.bounding_boxes.convert_format( box_prediction, source=target_format, target=self.bounding_box_format, - images=images, - image_shape=image_shape, + height=height, + width=width, ) bounding_boxes = { "boxes": box_prediction, "confidence": confidence_prediction, - "classes": ops.argmax(class_prediction, axis=-1), + "labels": ops.argmax(class_prediction, axis=-1), "num_detections": valid_det, } @@ -519,14 +531,8 @@ def mask_invalid_detections(bounding_boxes): returned value will also return `tf.RaggedTensor` representations. """ # ensure we are complying with Keras bounding box format. - info = validate_format.validate_format(bounding_boxes) - if info["ragged"]: - raise ValueError( - "`bounding_box.mask_invalid_detections()` requires inputs to be " - "Dense tensors. Please call " - "`bounding_box.to_dense(bounding_boxes)` before passing your boxes " - "to `bounding_box.mask_invalid_detections()`." - ) + validation.validate_bounding_boxes(bounding_boxes) + if "num_detections" not in bounding_boxes: raise ValueError( "`bounding_boxes` must have key 'num_detections' " @@ -534,7 +540,7 @@ def mask_invalid_detections(bounding_boxes): ) boxes = bounding_boxes.get("boxes") - classes = bounding_boxes.get("classes") + labels = bounding_boxes.get("labels") confidence = bounding_boxes.get("confidence", None) num_detections = bounding_boxes.get("num_detections") @@ -545,7 +551,7 @@ def mask_invalid_detections(bounding_boxes): ) mask = mask < num_detections[:, None] - classes = ops.where(mask, classes, -ops.ones_like(classes)) + labels = ops.where(mask, labels, -ops.ones_like(labels)) if confidence is not None: confidence = ops.where(mask, confidence, -ops.ones_like(confidence)) @@ -558,7 +564,7 @@ def mask_invalid_detections(bounding_boxes): result = bounding_boxes.copy() result["boxes"] = boxes - result["classes"] = classes + result["labels"] = labels if confidence is not None: result["confidence"] = confidence diff --git a/keras_hub/src/models/retinanet/non_max_supression_test.py b/keras_hub/src/layers/modeling/non_max_supression_test.py similarity index 88% rename from keras_hub/src/models/retinanet/non_max_supression_test.py rename to keras_hub/src/layers/modeling/non_max_supression_test.py index 94d3c3f124..4f310a0934 100644 --- a/keras_hub/src/models/retinanet/non_max_supression_test.py +++ b/keras_hub/src/layers/modeling/non_max_supression_test.py @@ -1,7 +1,7 @@ import numpy as np from keras import ops -from keras_hub.src.models.retinanet.non_max_supression import NonMaxSuppression +from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression from keras_hub.src.tests.test_case import TestCase @@ -29,7 +29,7 @@ def test_confidence_threshold(self): self.assertAllClose( outputs["boxes"], [boxes[0][-2:, ...], boxes[1][:2, ...]] ) - self.assertAllClose(outputs["classes"], [[0.0, 0.0], [0.0, 0.0]]) + self.assertAllClose(outputs["labels"], [[0.0, 0.0], [0.0, 0.0]]) self.assertAllClose(outputs["confidence"], [[0.9, 0.5], [0.7, 0.5]]) def test_max_detections(self): @@ -55,5 +55,5 @@ def test_max_detections(self): self.assertAllClose( outputs["boxes"], [boxes[0][-1:, ...], boxes[1][:1, ...]] ) - self.assertAllClose(outputs["classes"], [[0.0], [0.0]]) + self.assertAllClose(outputs["labels"], [[0.0], [0.0]]) self.assertAllClose(outputs["confidence"], [[0.9], [0.7]]) diff --git a/keras_hub/src/models/image_object_detector_preprocessor.py b/keras_hub/src/models/image_object_detector_preprocessor.py index 2f89d216ef..6ff6e2c21d 100644 --- a/keras_hub/src/models/image_object_detector_preprocessor.py +++ b/keras_hub/src/models/image_object_detector_preprocessor.py @@ -53,5 +53,5 @@ def __init__( @preprocessing_function def call(self, x, y=None, sample_weight=None): if self.image_converter: - x = self.image_converter(x) + x, y = self.image_converter(x, y) return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) diff --git a/keras_hub/src/models/retinanet/retinanet_image_converter.py b/keras_hub/src/models/retinanet/retinanet_image_converter.py index 6d26323a0a..279c5ef92d 100644 --- a/keras_hub/src/models/retinanet/retinanet_image_converter.py +++ b/keras_hub/src/models/retinanet/retinanet_image_converter.py @@ -1,3 +1,5 @@ +import keras + from keras_hub.src.api_export import keras_hub_export from keras_hub.src.layers.preprocessing.image_converter import ImageConverter from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone @@ -10,25 +12,38 @@ class RetinaNetImageConverter(ImageConverter): def __init__( self, - image_size=None, - scale=None, - offset=None, + bounding_box_format, + pad_to_aspect_ratio=False, norm_mean=[0.485, 0.456, 0.406], norm_std=[0.229, 0.224, 0.225], **kwargs, ): super().__init__(**kwargs) - self.image_size = image_size - self.scale = scale - self.offset = offset + self.resizing = keras.layers.Resizing( + height=self.image_size[0] if self.image_size else None, + width=self.image_size[1] if self.image_size else None, + bounding_box_format=bounding_box_format, + crop_to_aspect_ratio=self.crop_to_aspect_ratio, + pad_to_aspect_ratio=pad_to_aspect_ratio, + interpolation=self.interpolation, + data_format=self.data_format, + dtype=self.dtype_policy, + name="resizing", + ) + + self.bounding_box_format = bounding_box_format + self.pad_to_aspect_ratio = pad_to_aspect_ratio self.norm_mean = norm_mean self.norm_std = norm_std - self.built = True @preprocessing_function - def call(self, inputs): - # TODO: https://github.com/keras-team/keras-hub/issues/1965 - x = inputs + def call(self, x, y=None, sample_weight=None): + if y is not None: + inputs = self.resizing({"images": x, "bounding_boxes": y}) + x = inputs["images"] + y = inputs["bounding_boxes"] + else: + x = self.resizing(x) # Rescaling Image if self.scale is not None: x = x * self._expand_non_channel_dims(self.scale, x) @@ -40,12 +55,14 @@ def call(self, inputs): if self.norm_std: x = x / self._expand_non_channel_dims(self.norm_std, x) - return x + return x, y def get_config(self): config = super().get_config() config.update( { + "bounding_box_format": self.bounding_box_format, + "pad_to_aspect_ratio": self.pad_to_aspect_ratio, "norm_mean": self.norm_mean, "norm_std": self.norm_std, } diff --git a/keras_hub/src/models/retinanet/retinanet_label_encoder.py b/keras_hub/src/models/retinanet/retinanet_label_encoder.py index 66a6ff6d78..886d86422d 100644 --- a/keras_hub/src/models/retinanet/retinanet_label_encoder.py +++ b/keras_hub/src/models/retinanet/retinanet_label_encoder.py @@ -4,10 +4,7 @@ from keras import ops # TODO: https://github.com/keras-team/keras-hub/issues/1965 -from keras_hub.src.bounding_box.converters import convert_format -from keras_hub.src.bounding_box.converters import encode_box_to_deltas -from keras_hub.src.bounding_box.iou import compute_iou -from keras_hub.src.models.retinanet.box_matcher import BoxMatcher +from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.utils import tensor_utils @@ -113,7 +110,7 @@ def call(self, images, gt_boxes, gt_classes): "support unbatched inputs for the `images` argument. " f"Received `shape(images)={images_shape}`." ) - image_shape = images_shape[1:] + height, width, _ = images_shape[1:] if len(ops.shape(gt_classes)) == 2: gt_classes = ops.expand_dims(gt_classes, axis=-1) @@ -122,14 +119,14 @@ def call(self, images, gt_boxes, gt_classes): anchor_boxes = ops.concatenate(list(anchor_boxes.values()), axis=0) box_targets, class_targets = self._encode_sample( - gt_boxes, gt_classes, anchor_boxes, image_shape + gt_boxes, gt_classes, anchor_boxes, height, width ) box_targets = ops.reshape( box_targets, (-1, ops.shape(box_targets)[1], 4) ) return box_targets, class_targets - def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, image_shape): + def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, height, width): """Creates box and classification targets for a batched sample. Matches ground truth boxes to anchor boxes based on IOU. @@ -149,23 +146,25 @@ def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, image_shape): anchor_boxes: A Tensor with the shape `[total_anchors, 4]` representing all the anchor boxes for a given input image shape, where each anchor box is of the format `[x, y, width, height]`. - image_shape: Tuple indicating the image shape `[H, W, C]`. + height: int. + width: int. Returns: Encoded bounding boxes in the format of `center_yxwh` and corresponding labels for each encoded bounding box. """ - anchor_boxes = convert_format( + anchor_boxes = keras.utils.bounding_boxes.convert_format( anchor_boxes, source=self.anchor_generator.bounding_box_format, target=self.bounding_box_format, - image_shape=image_shape, + height=height, + width=width, ) - iou_matrix = compute_iou( + iou_matrix = keras.utils.bounding_boxes.compute_iou( anchor_boxes, gt_boxes, bounding_box_format=self.bounding_box_format, - image_shape=image_shape, + image_shape=(height, width, 3), ) matched_gt_idx, matched_vals = self.box_matcher(iou_matrix) @@ -179,14 +178,14 @@ def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, image_shape): matched_gt_boxes, (-1, ops.shape(matched_gt_boxes)[1], 4) ) - box_targets = encode_box_to_deltas( + box_targets = keras.utils.bounding_boxes.encode_box_to_deltas( anchors=anchor_boxes, boxes=matched_gt_boxes, anchor_format=self.bounding_box_format, box_format=self.bounding_box_format, encoding_format=self.encoding_format, variance=self.box_variance, - image_shape=image_shape, + image_shape=(height, width, 3), ) matched_gt_cls_ids = tensor_utils.target_gather( diff --git a/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py b/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py index d05bf5a99a..ca4f151309 100644 --- a/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py +++ b/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py @@ -1,7 +1,7 @@ import numpy as np from keras import ops -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.models.retinanet.retinanet_label_encoder import ( RetinaNetLabelEncoder, ) diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector.py b/keras_hub/src/models/retinanet/retinanet_object_detector.py index 14b3a631c5..d9523a3a58 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector.py @@ -2,13 +2,9 @@ from keras import ops from keras_hub.src.api_export import keras_hub_export - -# TODO: https://github.com/keras-team/keras-hub/issues/1965 -from keras_hub.src.bounding_box.converters import convert_format -from keras_hub.src.bounding_box.converters import decode_deltas_to_boxes +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression from keras_hub.src.models.image_object_detector import ImageObjectDetector -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator -from keras_hub.src.models.retinanet.non_max_supression import NonMaxSuppression from keras_hub.src.models.retinanet.prediction_head import PredictionHead from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_label_encoder import ( @@ -204,17 +200,19 @@ def __init__( ) def compute_loss(self, x, y, y_pred, sample_weight, **kwargs): - y_for_label_encoder = convert_format( + _, height, width, _ = keras.ops.shape(x) + y_for_label_encoder = keras.utils.bounding_boxes.convert_format( y, source=self.bounding_box_format, target=self.label_encoder.bounding_box_format, - images=x, + height=height, + width=width, ) - boxes, classes = self.label_encoder( + boxes, labels = self.label_encoder( images=x, gt_boxes=y_for_label_encoder["boxes"], - gt_classes=y_for_label_encoder["classes"], + gt_classes=y_for_label_encoder["labels"], ) box_pred = y_pred["bbox_regression"] @@ -242,11 +240,11 @@ def compute_loss(self, x, y, y_pred, sample_weight, **kwargs): ) cls_labels = ops.one_hot( - ops.cast(classes, "int32"), self.num_classes, dtype="float32" + ops.cast(labels, "int32"), self.num_classes, dtype="float32" ) - positive_mask = ops.cast(ops.greater(classes, -1.0), dtype="float32") + positive_mask = ops.cast(ops.greater(labels, -1.0), dtype="float32") normalizer = ops.sum(positive_mask) - cls_weights = ops.cast(ops.not_equal(classes, -2.0), dtype="float32") + cls_weights = ops.cast(ops.not_equal(labels, -2.0), dtype="float32") cls_weights /= normalizer box_weights = positive_mask / normalizer @@ -306,32 +304,32 @@ def decode_predictions(self, predictions, data): images, _ = data else: images = data - image_shape = ops.shape(images)[1:] + height, width, channels = ops.shape(images)[1:] anchor_boxes = self.anchor_generator(images) anchor_boxes = ops.concatenate(list(anchor_boxes.values()), axis=0) - box_pred = decode_deltas_to_boxes( + box_pred = keras.utils.bounding_boxes.decode_deltas_to_boxes( anchors=anchor_boxes, boxes_delta=box_pred, encoded_format="center_xywh", anchor_format=self.anchor_generator.bounding_box_format, box_format=self.bounding_box_format, - image_shape=image_shape, + image_shape=(height, width, channels), ) # box_pred is now in "self.bounding_box_format" format - box_pred = convert_format( + box_pred = keras.utils.bounding_boxes.convert_format( box_pred, source=self.bounding_box_format, target=self.prediction_decoder.bounding_box_format, - image_shape=image_shape, - ) - y_pred = self.prediction_decoder( - box_pred, cls_pred, image_shape=image_shape + height=height, + width=width, ) - y_pred["boxes"] = convert_format( + y_pred = self.prediction_decoder(box_pred, cls_pred, images=images) + y_pred["boxes"] = keras.utils.bounding_boxes.convert_format( y_pred["boxes"], source=self.prediction_decoder.bounding_box_format, target=self.bounding_box_format, - image_shape=image_shape, + height=height, + width=width, ) return y_pred diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py index 53d7461bb1..38e917c4a7 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py @@ -1,8 +1,8 @@ import numpy as np import pytest +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_image_converter import ( RetinaNetImageConverter, @@ -53,7 +53,9 @@ def setUp(self): bounding_box_format="yxyx", anchor_generator=anchor_generator ) - image_converter = RetinaNetImageConverter(scale=1 / 255.0) + image_converter = RetinaNetImageConverter( + bounding_box_format="yxyx", scale=1 / 255.0, image_size=(800, 800) + ) preprocessor = RetinaNetObjectDetectorPreprocessor( image_converter=image_converter @@ -76,7 +78,7 @@ def setUp(self): "boxes": np.array( [[[20.0, 10.0, 12.0, 11.0], [30.0, 20.0, 40.0, 12.0]]] ), - "classes": np.array([[0, 2]]), + "labels": np.array([[0, 2]]), } self.train_data = (self.images, self.labels) @@ -87,7 +89,7 @@ def test_detection_basics(self): train_data=self.train_data, expected_output_shape={ "boxes": (1, 100, 4), - "classes": (1, 100), + "labels": (1, 100), "confidence": (1, 100), "num_detections": (1,), }, From 43e45917e4b651689c0a271500afdce27be2bd7b Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Mon, 13 Jan 2025 15:07:19 -0800 Subject: [PATCH 02/15] Remove bbox utils from KerasHub, adding proper docstrings --- keras_hub/api/__init__.py | 1 - keras_hub/api/bounding_box/__init__.py | 23 - keras_hub/src/bounding_box/__init__.py | 2 - keras_hub/src/bounding_box/converters.py | 606 ------------------ keras_hub/src/bounding_box/converters_test.py | 351 ---------- keras_hub/src/bounding_box/formats.py | 149 ----- keras_hub/src/bounding_box/iou.py | 251 -------- keras_hub/src/bounding_box/iou_test.py | 148 ----- keras_hub/src/bounding_box/to_dense.py | 81 --- keras_hub/src/bounding_box/to_dense_test.py | 23 - keras_hub/src/bounding_box/to_ragged.py | 86 --- keras_hub/src/bounding_box/to_ragged_test.py | 87 --- keras_hub/src/bounding_box/utils.py | 181 ------ keras_hub/src/bounding_box/utils_test.py | 155 ----- keras_hub/src/bounding_box/validate_format.py | 85 --- .../src/bounding_box/validate_format_test.py | 34 - .../src/layers/modeling/anchor_generator.py | 6 +- keras_hub/src/layers/modeling/box_matcher.py | 14 +- .../src/layers/modeling/non_max_supression.py | 22 + 19 files changed, 37 insertions(+), 2268 deletions(-) delete mode 100644 keras_hub/api/bounding_box/__init__.py delete mode 100644 keras_hub/src/bounding_box/__init__.py delete mode 100644 keras_hub/src/bounding_box/converters.py delete mode 100644 keras_hub/src/bounding_box/converters_test.py delete mode 100644 keras_hub/src/bounding_box/formats.py delete mode 100644 keras_hub/src/bounding_box/iou.py delete mode 100644 keras_hub/src/bounding_box/iou_test.py delete mode 100644 keras_hub/src/bounding_box/to_dense.py delete mode 100644 keras_hub/src/bounding_box/to_dense_test.py delete mode 100644 keras_hub/src/bounding_box/to_ragged.py delete mode 100644 keras_hub/src/bounding_box/to_ragged_test.py delete mode 100644 keras_hub/src/bounding_box/utils.py delete mode 100644 keras_hub/src/bounding_box/utils_test.py delete mode 100644 keras_hub/src/bounding_box/validate_format.py delete mode 100644 keras_hub/src/bounding_box/validate_format_test.py diff --git a/keras_hub/api/__init__.py b/keras_hub/api/__init__.py index 7e44f01381..fa8636ab70 100644 --- a/keras_hub/api/__init__.py +++ b/keras_hub/api/__init__.py @@ -4,7 +4,6 @@ since your modifications would be overwritten. """ -from keras_hub.api import bounding_box from keras_hub.api import layers from keras_hub.api import metrics from keras_hub.api import models diff --git a/keras_hub/api/bounding_box/__init__.py b/keras_hub/api/bounding_box/__init__.py deleted file mode 100644 index dfdea4305c..0000000000 --- a/keras_hub/api/bounding_box/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -"""DO NOT EDIT. - -This file was autogenerated. Do not edit it by hand, -since your modifications would be overwritten. -""" - -from keras_hub.src.bounding_box.converters import convert_format -from keras_hub.src.bounding_box.formats import CENTER_XYWH -from keras_hub.src.bounding_box.formats import REL_XYWH -from keras_hub.src.bounding_box.formats import REL_XYXY -from keras_hub.src.bounding_box.formats import REL_YXYX -from keras_hub.src.bounding_box.formats import XYWH -from keras_hub.src.bounding_box.formats import XYXY -from keras_hub.src.bounding_box.formats import YXYX -from keras_hub.src.bounding_box.iou import compute_ciou -from keras_hub.src.bounding_box.iou import compute_iou -from keras_hub.src.bounding_box.to_dense import to_dense -from keras_hub.src.bounding_box.to_ragged import to_ragged -from keras_hub.src.bounding_box.utils import as_relative -from keras_hub.src.bounding_box.utils import clip_boxes -from keras_hub.src.bounding_box.utils import clip_to_image -from keras_hub.src.bounding_box.utils import is_relative -from keras_hub.src.bounding_box.validate_format import validate_format diff --git a/keras_hub/src/bounding_box/__init__.py b/keras_hub/src/bounding_box/__init__.py deleted file mode 100644 index 78f451fd0d..0000000000 --- a/keras_hub/src/bounding_box/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# TODO: Once all bounding boxes are moved to keras repostory remove the -# bounding box folder. diff --git a/keras_hub/src/bounding_box/converters.py b/keras_hub/src/bounding_box/converters.py deleted file mode 100644 index 7c347a9815..0000000000 --- a/keras_hub/src/bounding_box/converters.py +++ /dev/null @@ -1,606 +0,0 @@ -"""Converter functions for working with bounding box formats.""" - -import keras -from keras import ops - -from keras_hub.src.api_export import keras_hub_export - -try: - import tensorflow as tf -except ImportError: - tf = None - - -# Internal exception to propagate the fact images was not passed to a converter -# that needs it. -class RequiresImagesException(Exception): - pass - - -ALL_AXES = 4 - - -def encode_box_to_deltas( - anchors, - boxes, - anchor_format, - box_format, - encoding_format="center_yxhw", - variance=None, - image_shape=None, -): - """Encodes bounding boxes relative to anchors as deltas. - - This function calculates the deltas that represent the difference between - bounding boxes and provided anchors. Deltas encode the offsets and scaling - factors to apply to anchors to obtain the target boxes. - - Boxes and anchors are first converted to the specified `encoding_format` - (defaulting to `center_yxhw`) for consistent delta representation. - - Args: - anchors: `Tensors`. Anchor boxes with shape of `(N, 4)` where N is the - number of anchors. - boxes: `Tensors` Bounding boxes to encode. Boxes can be of shape - `(B, N, 4)` or `(N, 4)`. - anchor_format: str. The format of the input `anchors` - (e.g., "xyxy", "xywh", etc.). - box_format: str. The format of the input `boxes` - (e.g., "xyxy", "xywh", etc.). - encoding_format: str. The intermediate format to which boxes and anchors - are converted before delta calculation. Defaults to "center_yxhw". - variance: `List[float]`. A 4-element array/tensor representing variance - factors to scale the box deltas. If provided, the calculated deltas - are divided by the variance. Defaults to None. - image_shape: `Tuple[int]`. The shape of the image (height, width, 3). - When using relative bounding box format for `box_format` the - `image_shape` is used for normalization. - Returns: - Encoded box deltas. The return type matches the `encode_format`. - - Raises: - ValueError: If `variance` is not None and its length is not 4. - ValueError: If `encoding_format` is not `"center_xywh"` or - `"center_yxhw"`. - - """ - if variance is not None: - variance = ops.convert_to_tensor(variance, "float32") - var_len = variance.shape[-1] - - if var_len != 4: - raise ValueError(f"`variance` must be length 4, got {variance}") - - if encoding_format not in ["center_xywh", "center_yxhw"]: - raise ValueError( - "`encoding_format` should be one of 'center_xywh' or " - f"'center_yxhw', got {encoding_format}" - ) - - encoded_anchors = convert_format( - anchors, - source=anchor_format, - target=encoding_format, - image_shape=image_shape, - ) - boxes = convert_format( - boxes, - source=box_format, - target=encoding_format, - image_shape=image_shape, - ) - anchor_dimensions = ops.maximum( - encoded_anchors[..., 2:], keras.backend.epsilon() - ) - box_dimensions = ops.maximum(boxes[..., 2:], keras.backend.epsilon()) - # anchors be unbatched, boxes can either be batched or unbatched. - boxes_delta = ops.concatenate( - [ - (boxes[..., :2] - encoded_anchors[..., :2]) / anchor_dimensions, - ops.log(box_dimensions / anchor_dimensions), - ], - axis=-1, - ) - if variance is not None: - boxes_delta /= variance - return boxes_delta - - -def decode_deltas_to_boxes( - anchors, - boxes_delta, - anchor_format, - box_format, - encoded_format="center_yxhw", - variance=None, - image_shape=None, -): - """Converts bounding boxes from delta format to the specified `box_format`. - - This function decodes bounding box deltas relative to anchors to obtain the - final bounding box coordinates. The boxes are encoded in a specific - `encoded_format` (center_yxhw by default) during the decoding process. - This allows flexibility in how the deltas are applied to the anchors. - - Args: - anchors: Can be `Tensors` or `Dict[Tensors]` where keys are level - indices and values are corresponding anchor boxes. - The shape of the array/tensor should be `(N, 4)` where N is the - number of anchors. - boxes_delta Can be `Tensors` or `Dict[Tensors]` Bounding box deltas - must have the same type and structure as `anchors`. The - shape of the array/tensor can be `(N, 4)` or `(B, N, 4)` where N is - the number of boxes. - anchor_format: str. The format of the input `anchors`. - (e.g., `"xyxy"`, `"xywh"`, etc.) - box_format: str. The desired format for the output boxes. - (e.g., `"xyxy"`, `"xywh"`, etc.) - encoded_format: str. Raw output format from regression head. Defaults - to `"center_yxhw"`. - variance: `List[floats]`. A 4-element array/tensor representing - variance factors to scale the box deltas. If provided, the deltas - are multiplied by the variance before being applied to the anchors. - Defaults to None. - image_shape: The shape of the image (height, width). This is needed - if normalization to image size is required when converting between - formats. Defaults to None. - - Returns: - Decoded box coordinates. The return type matches the `box_format`. - - Raises: - ValueError: If `variance` is not None and its length is not 4. - ValueError: If `encoded_format` is not `"center_xywh"` or - `"center_yxhw"`. - - """ - if variance is not None: - variance = ops.convert_to_tensor(variance, "float32") - var_len = variance.shape[-1] - - if var_len != 4: - raise ValueError(f"`variance` must be length 4, got {variance}") - - if encoded_format not in ["center_xywh", "center_yxhw"]: - raise ValueError( - f"`encoded_format` should be 'center_xywh' or 'center_yxhw', " - f"but got '{encoded_format}'." - ) - - def decode_single_level(anchor, box_delta): - encoded_anchor = convert_format( - anchor, - source=anchor_format, - target=encoded_format, - image_shape=image_shape, - ) - if variance is not None: - box_delta = box_delta * variance - # anchors be unbatched, boxes can either be batched or unbatched. - box = ops.concatenate( - [ - box_delta[..., :2] * encoded_anchor[..., 2:] - + encoded_anchor[..., :2], - ops.exp(box_delta[..., 2:]) * encoded_anchor[..., 2:], - ], - axis=-1, - ) - box = convert_format( - box, - source=encoded_format, - target=box_format, - image_shape=image_shape, - ) - return box - - if isinstance(anchors, dict) and isinstance(boxes_delta, dict): - boxes = {} - for lvl, anchor in anchors.items(): - boxes[lvl] = decode_single_level(anchor, boxes_delta[lvl]) - return boxes - else: - return decode_single_level(anchors, boxes_delta) - - -def _center_yxhw_to_xyxy(boxes, images=None, image_shape=None): - y, x, height, width = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [x - width / 2.0, y - height / 2.0, x + width / 2.0, y + height / 2.0], - axis=-1, - ) - - -def _center_xywh_to_xyxy(boxes, images=None, image_shape=None): - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [x - width / 2.0, y - height / 2.0, x + width / 2.0, y + height / 2.0], - axis=-1, - ) - - -def _xywh_to_xyxy(boxes, images=None, image_shape=None): - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([x, y, x + width, y + height], axis=-1) - - -def _xyxy_to_center_yxhw(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - (top + bottom) / 2.0, - (left + right) / 2.0, - bottom - top, - right - left, - ], - axis=-1, - ) - - -def _rel_xywh_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - image_width * x, - image_height * y, - image_width * (x + width), - image_height * (y + height), - ], - axis=-1, - ) - - -def _xyxy_no_op(boxes, images=None, image_shape=None): - return boxes - - -def _xyxy_to_xywh(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [left, top, right - left, bottom - top], - axis=-1, - ) - - -def _xyxy_to_rel_xywh(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - left, right = ( - left / image_width, - right / image_width, - ) - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [left, top, right - left, bottom - top], - axis=-1, - ) - - -def _xyxy_to_center_xywh(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - (left + right) / 2.0, - (top + bottom) / 2.0, - right - left, - bottom - top, - ], - axis=-1, - ) - - -def _rel_xyxy_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left * image_width, right * image_width - top, bottom = top * image_height, bottom * image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _xyxy_to_rel_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left / image_width, right / image_width - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _yxyx_to_xyxy(boxes, images=None, image_shape=None): - y1, x1, y2, x2 = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([x1, y1, x2, y2], axis=-1) - - -def _rel_yxyx_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - top, left, bottom, right = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left * image_width, right * image_width - top, bottom = top * image_height, bottom * image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _xyxy_to_yxyx(boxes, images=None, image_shape=None): - x1, y1, x2, y2 = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([y1, x1, y2, x2], axis=-1) - - -def _xyxy_to_rel_yxyx(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - left, right = left / image_width, right / image_width - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [top, left, bottom, right], - axis=-1, - ) - - -TO_XYXY_CONVERTERS = { - "xywh": _xywh_to_xyxy, - "center_xywh": _center_xywh_to_xyxy, - "center_yxhw": _center_yxhw_to_xyxy, - "rel_xywh": _rel_xywh_to_xyxy, - "xyxy": _xyxy_no_op, - "rel_xyxy": _rel_xyxy_to_xyxy, - "yxyx": _yxyx_to_xyxy, - "rel_yxyx": _rel_yxyx_to_xyxy, -} - -FROM_XYXY_CONVERTERS = { - "xywh": _xyxy_to_xywh, - "center_xywh": _xyxy_to_center_xywh, - "center_yxhw": _xyxy_to_center_yxhw, - "rel_xywh": _xyxy_to_rel_xywh, - "xyxy": _xyxy_no_op, - "rel_xyxy": _xyxy_to_rel_xyxy, - "yxyx": _xyxy_to_yxyx, - "rel_yxyx": _xyxy_to_rel_yxyx, -} - - -@keras_hub_export("keras_hub.bounding_box.convert_format") -def convert_format( - boxes, source, target, images=None, image_shape=None, dtype="float32" -): - f"""Converts bounding_boxes from one format to another. - - Supported formats are: - - `"xyxy"`, also known as `corners` format. In this format the first four - axes represent `[left, top, right, bottom]` in that order. - - `"rel_xyxy"`. In this format, the axes are the same as `"xyxy"` but the x - coordinates are normalized using the image width, and the y axes the - image height. All values in `rel_xyxy` are in the range `(0, 1)`. - - `"xywh"`. In this format the first four axes represent - `[left, top, width, height]`. - - `"rel_xywh". In this format the first four axes represent - [left, top, width, height], just like `"xywh"`. Unlike `"xywh"`, the - values are in the range (0, 1) instead of absolute pixel values. - - `"center_xyWH"`. In this format the first two coordinates represent the x - and y coordinates of the center of the bounding box, while the last two - represent the width and height of the bounding box. - - `"center_yxHW"`. In this format the first two coordinates represent the y - and x coordinates of the center of the bounding box, while the last two - represent the height and width of the bounding box. - - `"yxyx"`. In this format the first four axes represent - [top, left, bottom, right] in that order. - - `"rel_yxyx"`. In this format, the axes are the same as `"yxyx"` but the x - coordinates are normalized using the image width, and the y axes the - image height. All values in `rel_yxyx` are in the range (0, 1). - Formats are case insensitive. It is recommended that you capitalize width - and height to maximize the visual difference between `"xyWH"` and `"xyxy"`. - - Relative formats, abbreviated `rel`, make use of the shapes of the `images` - passed. In these formats, the coordinates, widths, and heights are all - specified as percentages of the host image. `images` may be a ragged - Tensor. Note that using a ragged Tensor for images may cause a substantial - performance loss, as each image will need to be processed separately due to - the mismatching image shapes. - - Example: - - ```python - boxes = load_coco_dataset() - boxes_in_xywh = keras_hub.bounding_box.convert_format( - boxes, - source='xyxy', - target='xyWH' - ) - ``` - - Args: - boxes: tensor representing bounding boxes in the format specified in - the `source` parameter. `boxes` can optionally have extra - dimensions stacked on the final axis to store metadata. boxes - should be a 3D tensor, with the shape `[batch_size, num_boxes, 4]`. - Alternatively, boxes can be a dictionary with key 'boxes' containing - a tensor matching the aforementioned spec. - source:One of {" ".join([f'"{f}"' for f in TO_XYXY_CONVERTERS.keys()])}. - Used to specify the original format of the `boxes` parameter. - target:One of {" ".join([f'"{f}"' for f in TO_XYXY_CONVERTERS.keys()])}. - Used to specify the destination format of the `boxes` parameter. - images: (Optional) a batch of images aligned with `boxes` on the first - axis. Should be at least 3 dimensions, with the first 3 dimensions - representing: `[batch_size, height, width]`. Used in some - converters to compute relative pixel values of the bounding box - dimensions. Required when transforming from a rel format to a - non-rel format. - dtype: the data type to use when transforming the boxes, defaults to - `"float32"`. - """ - if isinstance(boxes, dict): - converted_boxes = boxes.copy() - converted_boxes["boxes"] = convert_format( - boxes["boxes"], - source=source, - target=target, - images=images, - image_shape=image_shape, - dtype=dtype, - ) - return converted_boxes - - if boxes.shape[-1] is not None and boxes.shape[-1] != 4: - raise ValueError( - "Expected `boxes` to be a Tensor with a final dimension of " - f"`4`. Instead, got `boxes.shape={boxes.shape}`." - ) - if images is not None and image_shape is not None: - raise ValueError( - "convert_format() expects either `images` or `image_shape`, but " - f"not both. Received images={images} image_shape={image_shape}" - ) - - _validate_image_shape(image_shape) - - source = source.lower() - target = target.lower() - if source not in TO_XYXY_CONVERTERS: - raise ValueError( - "`convert_format()` received an unsupported format for the " - "argument `source`. `source` should be one of " - f"{TO_XYXY_CONVERTERS.keys()}. Got source={source}" - ) - if target not in FROM_XYXY_CONVERTERS: - raise ValueError( - "`convert_format()` received an unsupported format for the " - "argument `target`. `target` should be one of " - f"{FROM_XYXY_CONVERTERS.keys()}. Got target={target}" - ) - - boxes = ops.cast(boxes, dtype) - if source == target: - return boxes - - # rel->rel conversions should not require images - if source.startswith("rel") and target.startswith("rel"): - source = source.replace("rel_", "", 1) - target = target.replace("rel_", "", 1) - - boxes, images, squeeze = _format_inputs(boxes, images) - to_xyxy_fn = TO_XYXY_CONVERTERS[source] - from_xyxy_fn = FROM_XYXY_CONVERTERS[target] - - try: - in_xyxy = to_xyxy_fn(boxes, images=images, image_shape=image_shape) - result = from_xyxy_fn(in_xyxy, images=images, image_shape=image_shape) - except RequiresImagesException: - raise ValueError( - "convert_format() must receive `images` or `image_shape` when " - "transforming between relative and absolute formats." - f"convert_format() received source=`{format}`, target=`{format}, " - f"but images={images} and image_shape={image_shape}." - ) - - return _format_outputs(result, squeeze) - - -def _format_inputs(boxes, images): - boxes_rank = len(boxes.shape) - if boxes_rank > 3: - raise ValueError( - "Expected len(boxes.shape)=2, or len(boxes.shape)=3, got " - f"len(boxes.shape)={boxes_rank}" - ) - boxes_includes_batch = boxes_rank == 3 - # Determine if images needs an expand_dims() call - if images is not None: - images_rank = len(images.shape) - if images_rank > 4: - raise ValueError( - "Expected len(images.shape)=2, or len(images.shape)=3, got " - f"len(images.shape)={images_rank}" - ) - images_include_batch = images_rank == 4 - if boxes_includes_batch != images_include_batch: - raise ValueError( - "convert_format() expects both boxes and images to be batched, " - "or both boxes and images to be unbatched. Received " - f"len(boxes.shape)={boxes_rank}, " - f"len(images.shape)={images_rank}. Expected either " - "len(boxes.shape)=2 AND len(images.shape)=3, or " - "len(boxes.shape)=3 AND len(images.shape)=4." - ) - if not images_include_batch: - images = ops.expand_dims(images, axis=0) - - if not boxes_includes_batch: - return ops.expand_dims(boxes, axis=0), images, True - return boxes, images, False - - -def _validate_image_shape(image_shape): - # Escape early if image_shape is None and skip validation. - if image_shape is None: - return - # tuple/list - if isinstance(image_shape, (tuple, list)): - if len(image_shape) != 3: - raise ValueError( - "image_shape should be of length 3, but got " - f"image_shape={image_shape}" - ) - return - - # tensor - if ops.is_tensor(image_shape): - if len(image_shape.shape) > 1: - raise ValueError( - "image_shape.shape should be (3), but got " - f"image_shape.shape={image_shape.shape}" - ) - if image_shape.shape[0] != 3: - raise ValueError( - "image_shape.shape should be (3), but got " - f"image_shape.shape={image_shape.shape}" - ) - return - - # Warn about failure cases - raise ValueError( - "Expected image_shape to be either a tuple, list, Tensor. " - f"Received image_shape={image_shape}" - ) - - -def _format_outputs(boxes, squeeze): - if squeeze: - return ops.squeeze(boxes, axis=0) - return boxes - - -def _image_shape(images, image_shape, boxes): - if images is None and image_shape is None: - raise RequiresImagesException() - - if image_shape is None: - if not isinstance(images, tf.RaggedTensor): - image_shape = ops.shape(images) - height, width = image_shape[1], image_shape[2] - else: - height = ops.reshape(images.row_lengths(), (-1, 1)) - width = ops.reshape(ops.max(images.row_lengths(axis=2), 1), (-1, 1)) - height = ops.expand_dims(height, axis=-1) - width = ops.expand_dims(width, axis=-1) - else: - height, width = image_shape[0], image_shape[1] - return ops.cast(height, boxes.dtype), ops.cast(width, boxes.dtype) diff --git a/keras_hub/src/bounding_box/converters_test.py b/keras_hub/src/bounding_box/converters_test.py deleted file mode 100644 index 9617a2a2aa..0000000000 --- a/keras_hub/src/bounding_box/converters_test.py +++ /dev/null @@ -1,351 +0,0 @@ -import itertools - -import numpy as np -import pytest -import tensorflow as tf -from absl.testing import parameterized -from keras import backend - -from keras_hub.src.bounding_box import converters -from keras_hub.src.bounding_box import to_dense -from keras_hub.src.bounding_box import to_ragged -from keras_hub.src.tests.test_case import TestCase - - -class ConvertersTestCase(TestCase): - def setUp(self): - xyxy_box = np.array( - [[[10, 20, 110, 120], [20, 30, 120, 130]]], dtype="float32" - ) - yxyx_box = np.array( - [[[20, 10, 120, 110], [30, 20, 130, 120]]], dtype="float32" - ) - rel_xyxy_box = np.array( - [[[0.01, 0.02, 0.11, 0.12], [0.02, 0.03, 0.12, 0.13]]], - dtype="float32", - ) - rel_xyxy_box_ragged_images = np.array( - [[[0.10, 0.20, 1.1, 1.20], [0.40, 0.6, 2.40, 2.6]]], dtype="float32" - ) - rel_yxyx_box = np.array( - [[[0.02, 0.01, 0.12, 0.11], [0.03, 0.02, 0.13, 0.12]]], - dtype="float32", - ) - rel_yxyx_box_ragged_images = np.array( - [[[0.2, 0.1, 1.2, 1.1], [0.6, 0.4, 2.6, 2.4]]], dtype="float32" - ) - center_xywh_box = np.array( - [[[60, 70, 100, 100], [70, 80, 100, 100]]], dtype="float32" - ) - xywh_box = np.array( - [[[10, 20, 100, 100], [20, 30, 100, 100]]], dtype="float32" - ) - rel_xywh_box = np.array( - [[[0.01, 0.02, 0.1, 0.1], [0.02, 0.03, 0.1, 0.1]]], dtype="float32" - ) - rel_xywh_box_ragged_images = np.array( - [[[0.1, 0.2, 1, 1], [0.4, 0.6, 2, 2]]], dtype="float32" - ) - - self.ragged_images = tf.ragged.constant( - [ - np.ones(shape=[100, 100, 3]), - np.ones(shape=[50, 50, 3]), - ], # 2 images - ragged_rank=2, - ) - - self.images = np.ones([2, 1000, 1000, 3]) - - self.ragged_classes = tf.ragged.constant([[0], [0]], dtype="float32") - - self.boxes = { - "xyxy": xyxy_box, - "center_xywh": center_xywh_box, - "rel_xywh": rel_xywh_box, - "xywh": xywh_box, - "rel_xyxy": rel_xyxy_box, - "yxyx": yxyx_box, - "rel_yxyx": rel_yxyx_box, - } - - self.boxes_ragged_images = { - "xyxy": xyxy_box, - "center_xywh": center_xywh_box, - "rel_xywh": rel_xywh_box_ragged_images, - "xywh": xywh_box, - "rel_xyxy": rel_xyxy_box_ragged_images, - "yxyx": yxyx_box, - "rel_yxyx": rel_yxyx_box_ragged_images, - } - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - def test_converters(self, source, target): - source, target - source_box = self.boxes[source] - target_box = self.boxes[target] - - self.assertAllClose( - converters.convert_format( - source_box, source=source, target=target, images=self.images - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_converters_ragged_images(self, source, target): - source_box = _raggify(self.boxes_ragged_images[source]) - target_box = _raggify(self.boxes_ragged_images[target]) - self.assertAllClose( - converters.convert_format( - source_box, - source=source, - target=target, - images=self.ragged_images, - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - def test_converters_unbatched(self, source, target): - source_box = self.boxes[source][0] - target_box = self.boxes[target][0] - - self.assertAllClose( - converters.convert_format( - source_box, source=source, target=target, images=self.images[0] - ), - target_box, - ) - - def test_raises_with_different_image_rank(self): - source_box = self.boxes["xyxy"][0] - with self.assertRaises(ValueError): - converters.convert_format( - source_box, source="xyxy", target="xywh", images=self.images - ) - - def test_without_images(self): - source_box = self.boxes["xyxy"] - target_box = self.boxes["xywh"] - self.assertAllClose( - converters.convert_format(source_box, source="xyxy", target="xywh"), - target_box, - ) - - def test_rel_to_rel_without_images(self): - source_box = self.boxes["rel_xyxy"] - target_box = self.boxes["rel_yxyx"] - self.assertAllClose( - converters.convert_format( - source_box, source="rel_xyxy", target="rel_yxyx" - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_ragged_bounding_box(self, source, target): - source_box = _raggify(self.boxes[source]) - target_box = _raggify(self.boxes[target]) - self.assertAllClose( - converters.convert_format( - source_box, source=source, target=target, images=self.images - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_ragged_bounding_box_ragged_images(self, source, target): - source_box = _raggify(self.boxes_ragged_images[source]) - target_box = _raggify(self.boxes_ragged_images[target]) - self.assertAllClose( - converters.convert_format( - source_box, - source=source, - target=target, - images=self.ragged_images, - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_ragged_bounding_box_with_image_shape(self, source, target): - source_box = _raggify(self.boxes[source]) - target_box = _raggify(self.boxes[target]) - self.assertAllClose( - converters.convert_format( - source_box, - source=source, - target=target, - image_shape=(1000, 1000, 3), - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_dense_bounding_box_with_ragged_images(self, source, target): - source_box = _raggify(self.boxes_ragged_images[source]) - target_box = _raggify(self.boxes_ragged_images[target]) - source_bounding_boxes = { - "boxes": source_box, - "classes": self.ragged_classes, - } - source_bounding_boxes = to_dense.to_dense(source_bounding_boxes) - - result_bounding_boxes = converters.convert_format( - source_bounding_boxes, - source=source, - target=target, - images=self.ragged_images, - ) - result_bounding_boxes = to_ragged.to_ragged(result_bounding_boxes) - - self.assertAllClose( - result_bounding_boxes["boxes"], - target_box, - ) - - -def _raggify(tensor): - tensor = tf.squeeze(tensor, axis=0) - tensor = tf.RaggedTensor.from_row_lengths(tensor, [1, 1]) - return tensor diff --git a/keras_hub/src/bounding_box/formats.py b/keras_hub/src/bounding_box/formats.py deleted file mode 100644 index c8e50ab60a..0000000000 --- a/keras_hub/src/bounding_box/formats.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -formats.py contains axis information for each supported format. -""" - -from keras_hub.src.api_export import keras_hub_export - - -@keras_hub_export("keras_hub.bounding_box.XYXY") -class XYXY: - """XYXY contains axis indices for the XYXY format. - - All values in the XYXY format should be absolute pixel values. - - The XYXY format consists of the following required indices: - - - LEFT: left of the bounding box - - TOP: top of the bounding box - - RIGHT: right of the bounding box - - BOTTOM: bottom of the bounding box - """ - - LEFT = 0 - TOP = 1 - RIGHT = 2 - BOTTOM = 3 - - -@keras_hub_export("keras_hub.bounding_box.REL_XYXY") -class REL_XYXY: - """REL_XYXY contains axis indices for the REL_XYXY format. - - REL_XYXY is like XYXY, but each value is relative to the width and height of - the origin image. Values are percentages of the origin images' width and - height respectively. - - The REL_XYXY format consists of the following required indices: - - - LEFT: left of the bounding box - - TOP: top of the bounding box - - RIGHT: right of the bounding box - - BOTTOM: bottom of the bounding box - """ - - LEFT = 0 - TOP = 1 - RIGHT = 2 - BOTTOM = 3 - - -@keras_hub_export("keras_hub.bounding_box.CENTER_XYWH") -class CENTER_XYWH: - """CENTER_XYWH contains axis indices for the CENTER_XYWH format. - - All values in the CENTER_XYWH format should be absolute pixel values. - - The CENTER_XYWH format consists of the following required indices: - - - X: X coordinate of the center of the bounding box - - Y: Y coordinate of the center of the bounding box - - WIDTH: width of the bounding box - - HEIGHT: height of the bounding box - """ - - X = 0 - Y = 1 - WIDTH = 2 - HEIGHT = 3 - - -@keras_hub_export("keras_hub.bounding_box.XYWH") -class XYWH: - """XYWH contains axis indices for the XYWH format. - - All values in the XYWH format should be absolute pixel values. - - The XYWH format consists of the following required indices: - - - X: X coordinate of the left of the bounding box - - Y: Y coordinate of the top of the bounding box - - WIDTH: width of the bounding box - - HEIGHT: height of the bounding box - """ - - X = 0 - Y = 1 - WIDTH = 2 - HEIGHT = 3 - - -@keras_hub_export("keras_hub.bounding_box.REL_XYWH") -class REL_XYWH: - """REL_XYWH contains axis indices for the XYWH format. - - REL_XYXY is like XYWH, but each value is relative to the width and height of - the origin image. Values are percentages of the origin images' width and - height respectively. - - - X: X coordinate of the left of the bounding box - - Y: Y coordinate of the top of the bounding box - - WIDTH: width of the bounding box - - HEIGHT: height of the bounding box - """ - - X = 0 - Y = 1 - WIDTH = 2 - HEIGHT = 3 - - -@keras_hub_export("keras_hub.bounding_box.YXYX") -class YXYX: - """YXYX contains axis indices for the YXYX format. - - All values in the YXYX format should be absolute pixel values. - - The YXYX format consists of the following required indices: - - - TOP: top of the bounding box - - LEFT: left of the bounding box - - BOTTOM: bottom of the bounding box - - RIGHT: right of the bounding box - """ - - TOP = 0 - LEFT = 1 - BOTTOM = 2 - RIGHT = 3 - - -@keras_hub_export("keras_hub.bounding_box.REL_YXYX") -class REL_YXYX: - """REL_YXYX contains axis indices for the REL_YXYX format. - - REL_YXYX is like YXYX, but each value is relative to the width and height of - the origin image. Values are percentages of the origin images' width and - height respectively. - - The REL_YXYX format consists of the following required indices: - - - TOP: top of the bounding box - - LEFT: left of the bounding box - - BOTTOM: bottom of the bounding box - - RIGHT: right of the bounding box - """ - - TOP = 0 - LEFT = 1 - BOTTOM = 2 - RIGHT = 3 diff --git a/keras_hub/src/bounding_box/iou.py b/keras_hub/src/bounding_box/iou.py deleted file mode 100644 index df2c907e4a..0000000000 --- a/keras_hub/src/bounding_box/iou.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Contains functions to compute ious of bounding boxes.""" - -import math - -import keras -from keras import ops - -from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.bounding_box.converters import convert_format -from keras_hub.src.bounding_box.utils import as_relative -from keras_hub.src.bounding_box.utils import is_relative - - -def _compute_area(box): - """Computes area for bounding boxes - - Args: - box: [N, 4] or [batch_size, N, 4] float Tensor, either batched - or unbatched boxes. - Returns: - a float Tensor of [N] or [batch_size, N] - """ - y_min, x_min, y_max, x_max = ops.split(box[..., :4], 4, axis=-1) - return ops.squeeze((y_max - y_min) * (x_max - x_min), axis=-1) - - -def _compute_intersection(boxes1, boxes2): - """Computes intersection area between two sets of boxes. - - Args: - boxes1: [N, 4] or [batch_size, N, 4] float Tensor boxes. - boxes2: [M, 4] or [batch_size, M, 4] float Tensor boxes. - Returns: - a [N, M] or [batch_size, N, M] float Tensor. - """ - y_min1, x_min1, y_max1, x_max1 = ops.split(boxes1[..., :4], 4, axis=-1) - y_min2, x_min2, y_max2, x_max2 = ops.split(boxes2[..., :4], 4, axis=-1) - boxes2_rank = len(boxes2.shape) - perm = [1, 0] if boxes2_rank == 2 else [0, 2, 1] - # [N, M] or [batch_size, N, M] - intersect_ymax = ops.minimum(y_max1, ops.transpose(y_max2, perm)) - intersect_ymin = ops.maximum(y_min1, ops.transpose(y_min2, perm)) - intersect_xmax = ops.minimum(x_max1, ops.transpose(x_max2, perm)) - intersect_xmin = ops.maximum(x_min1, ops.transpose(x_min2, perm)) - - intersect_height = intersect_ymax - intersect_ymin - intersect_width = intersect_xmax - intersect_xmin - zeros_t = ops.cast(0, intersect_height.dtype) - intersect_height = ops.maximum(zeros_t, intersect_height) - intersect_width = ops.maximum(zeros_t, intersect_width) - - return intersect_height * intersect_width - - -@keras_hub_export("keras_hub.bounding_box.compute_iou") -def compute_iou( - boxes1, - boxes2, - bounding_box_format, - use_masking=False, - mask_val=-1, - images=None, - image_shape=None, -): - """Computes a lookup table vector containing the ious for a given set boxes. - - The lookup vector is to be indexed by [`boxes1_index`,`boxes2_index`] if - boxes are unbatched and by [`batch`, `boxes1_index`,`boxes2_index`] if the - boxes are batched. - - The users can pass `boxes1` and `boxes2` to be different ranks. For example: - 1) `boxes1`: [batch_size, M, 4], `boxes2`: [batch_size, N, 4] -> return - [batch_size, M, N]. - 2) `boxes1`: [batch_size, M, 4], `boxes2`: [N, 4] -> return - [batch_size, M, N] - 3) `boxes1`: [M, 4], `boxes2`: [batch_size, N, 4] -> return - [batch_size, M, N] - 4) `boxes1`: [M, 4], `boxes2`: [N, 4] -> return [M, N] - - Args: - boxes1: a list of bounding boxes in 'corners' format. Can be batched or - unbatched. - boxes2: a list of bounding boxes in 'corners' format. Can be batched or - unbatched. - bounding_box_format: a case-insensitive string which is one of `"xyxy"`, - `"rel_xyxy"`, `"xyWH"`, `"center_xyWH"`, `"yxyx"`, `"rel_yxyx"`. - For detailed information on the supported format, see the - [KerasCV bounding box documentation](https://keras.io/api/keras_cv/bounding_box/formats/). - use_masking: whether masking will be applied. This will mask all `boxes1` - or `boxes2` that have values less than 0 in all its 4 dimensions. - Default to `False`. - mask_val: int to mask those returned IOUs if the masking is True, defaults - to -1. - - Returns: - iou_lookup_table: a vector containing the pairwise ious of boxes1 and - boxes2. - """ # noqa: E501 - - boxes1_rank = len(boxes1.shape) - boxes2_rank = len(boxes2.shape) - - if boxes1_rank not in [2, 3]: - raise ValueError( - "compute_iou() expects boxes1 to be batched, or to be unbatched. " - f"Received len(boxes1.shape)={boxes1_rank}, " - f"len(boxes2.shape)={boxes2_rank}. Expected either " - "len(boxes1.shape)=2 AND or len(boxes1.shape)=3." - ) - if boxes2_rank not in [2, 3]: - raise ValueError( - "compute_iou() expects boxes2 to be batched, or to be unbatched. " - f"Received len(boxes1.shape)={boxes1_rank}, " - f"len(boxes2.shape)={boxes2_rank}. Expected either " - "len(boxes2.shape)=2 AND or len(boxes2.shape)=3." - ) - - target_format = "yxyx" - if is_relative(bounding_box_format): - target_format = as_relative(target_format) - - boxes1 = convert_format( - boxes1, - source=bounding_box_format, - target=target_format, - images=images, - image_shape=image_shape, - ) - - boxes2 = convert_format( - boxes2, - source=bounding_box_format, - target=target_format, - images=images, - image_shape=image_shape, - ) - - intersect_area = _compute_intersection(boxes1, boxes2) - boxes1_area = _compute_area(boxes1) - boxes2_area = _compute_area(boxes2) - boxes2_area_rank = len(boxes2_area.shape) - boxes2_axis = 1 if (boxes2_area_rank == 2) else 0 - boxes1_area = ops.expand_dims(boxes1_area, axis=-1) - boxes2_area = ops.expand_dims(boxes2_area, axis=boxes2_axis) - union_area = boxes1_area + boxes2_area - intersect_area - res = ops.divide(intersect_area, union_area + keras.backend.epsilon()) - - if boxes1_rank == 2: - perm = [1, 0] - else: - perm = [0, 2, 1] - - if not use_masking: - return res - - mask_val_t = ops.cast(mask_val, res.dtype) * ops.ones_like(res) - boxes1_mask = ops.less(ops.max(boxes1, axis=-1, keepdims=True), 0.0) - boxes2_mask = ops.less(ops.max(boxes2, axis=-1, keepdims=True), 0.0) - background_mask = ops.logical_or( - boxes1_mask, ops.transpose(boxes2_mask, perm) - ) - iou_lookup_table = ops.where(background_mask, mask_val_t, res) - return iou_lookup_table - - -@keras_hub_export("keras_hub.bounding_box.compute_ciou") -def compute_ciou(boxes1, boxes2, bounding_box_format): - """ - Computes the Complete IoU (CIoU) between two bounding boxes or between - two batches of bounding boxes. - - CIoU loss is an extension of GIoU loss, which further improves the IoU - optimization for object detection. CIoU loss not only penalizes the - bounding box coordinates but also considers the aspect ratio and center - distance of the boxes. The length of the last dimension should be 4 to - represent the bounding boxes. - - Args: - box1 (tensor): tensor representing the first bounding box with - shape (..., 4). - box2 (tensor): tensor representing the second bounding box with - shape (..., 4). - bounding_box_format: a case-insensitive string (for example, "xyxy"). - Each bounding box is defined by these 4 values. For detailed - information on the supported formats, see the [KerasCV bounding box - documentation](https://keras.io/api/keras_cv/bounding_box/formats/). - - Returns: - tensor: The CIoU distance between the two bounding boxes. - """ - target_format = "xyxy" - if is_relative(bounding_box_format): - target_format = as_relative(target_format) - - boxes1 = convert_format( - boxes1, source=bounding_box_format, target=target_format - ) - - boxes2 = convert_format( - boxes2, source=bounding_box_format, target=target_format - ) - - x_min1, y_min1, x_max1, y_max1 = ops.split(boxes1[..., :4], 4, axis=-1) - x_min2, y_min2, x_max2, y_max2 = ops.split(boxes2[..., :4], 4, axis=-1) - - width_1 = x_max1 - x_min1 - height_1 = y_max1 - y_min1 + keras.backend.epsilon() - width_2 = x_max2 - x_min2 - height_2 = y_max2 - y_min2 + keras.backend.epsilon() - - intersection_area = ops.maximum( - ops.minimum(x_max1, x_max2) - ops.maximum(x_min1, x_min2), 0 - ) * ops.maximum( - ops.minimum(y_max1, y_max2) - ops.maximum(y_min1, y_min2), 0 - ) - union_area = ( - width_1 * height_1 - + width_2 * height_2 - - intersection_area - + keras.backend.epsilon() - ) - iou = ops.squeeze( - ops.divide(intersection_area, union_area + keras.backend.epsilon()), - axis=-1, - ) - - convex_width = ops.maximum(x_max1, x_max2) - ops.minimum(x_min1, x_min2) - convex_height = ops.maximum(y_max1, y_max2) - ops.minimum(y_min1, y_min2) - convex_diagonal_squared = ops.squeeze( - convex_width**2 + convex_height**2 + keras.backend.epsilon(), - axis=-1, - ) - centers_distance_squared = ops.squeeze( - ((x_min1 + x_max1) / 2 - (x_min2 + x_max2) / 2) ** 2 - + ((y_min1 + y_max1) / 2 - (y_min2 + y_max2) / 2) ** 2, - axis=-1, - ) - - v = ops.squeeze( - ops.power( - (4 / math.pi**2) - * (ops.arctan(width_2 / height_2) - ops.arctan(width_1 / height_1)), - 2, - ), - axis=-1, - ) - alpha = v / (v - iou + (1 + keras.backend.epsilon())) - - return iou - ( - centers_distance_squared / convex_diagonal_squared + v * alpha - ) diff --git a/keras_hub/src/bounding_box/iou_test.py b/keras_hub/src/bounding_box/iou_test.py deleted file mode 100644 index 2e00f24869..0000000000 --- a/keras_hub/src/bounding_box/iou_test.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Tests for iou functions.""" - -import numpy as np - -from keras_hub.src.bounding_box import iou as iou_lib -from keras_hub.src.tests.test_case import TestCase - - -class IoUTest(TestCase): - def test_compute_single_iou(self): - bb1 = np.array([[100, 101, 200, 201]]) - bb1_off_by_1 = np.array([[101, 102, 201, 202]]) - # area of bb1 and bb1_off_by_1 are each 10000. - # intersection area is 99*99=9801 - # iou=9801/(2*10000 - 9801)=0.96097656633 - self.assertAllClose( - iou_lib.compute_iou(bb1, bb1_off_by_1, "yxyx")[0], [0.96097656633] - ) - - def test_compute_iou(self): - bb1 = [100, 101, 200, 201] - bb1_off_by_1_pred = [101, 102, 201, 202] - iou_bb1_bb1_off = 0.96097656633 - top_left_bounding_box = [0, 2, 1, 3] - far_away_box = [1300, 1400, 1500, 1401] - another_far_away_pred = [1000, 1400, 1200, 1401] - - # Rows represent predictions, columns ground truths - expected_result = np.array( - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - dtype=np.float32, - ) - - sample_y_true = np.array([bb1, top_left_bounding_box, far_away_box]) - sample_y_pred = np.array( - [bb1_off_by_1_pred, top_left_bounding_box, another_far_away_pred], - ) - - result = iou_lib.compute_iou(sample_y_true, sample_y_pred, "yxyx") - self.assertAllClose(expected_result, result) - - def test_batched_compute_iou(self): - bb1 = [100, 101, 200, 201] - bb1_off_by_1_pred = [101, 102, 201, 202] - iou_bb1_bb1_off = 0.96097656633 - top_left_bounding_box = [0, 2, 1, 3] - far_away_box = [1300, 1400, 1500, 1401] - another_far_away_pred = [1000, 1400, 1200, 1401] - - # Rows represent predictions, columns ground truths - expected_result = np.array( - [ - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - ], - ) - - sample_y_true = np.array( - [ - [bb1, top_left_bounding_box, far_away_box], - [bb1, top_left_bounding_box, far_away_box], - ], - ) - sample_y_pred = np.array( - [ - [ - bb1_off_by_1_pred, - top_left_bounding_box, - another_far_away_pred, - ], - [ - bb1_off_by_1_pred, - top_left_bounding_box, - another_far_away_pred, - ], - ], - ) - - result = iou_lib.compute_iou(sample_y_true, sample_y_pred, "yxyx") - self.assertAllClose(expected_result, result) - - def test_batched_boxes1_unbatched_boxes2(self): - bb1 = [100, 101, 200, 201] - bb1_off_by_1_pred = [101, 102, 201, 202] - iou_bb1_bb1_off = 0.96097656633 - top_left_bounding_box = [0, 2, 1, 3] - far_away_box = [1300, 1400, 1500, 1401] - another_far_away_pred = [1000, 1400, 1200, 1401] - - # Rows represent predictions, columns ground truths - expected_result = np.array( - [ - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - ], - ) - - sample_y_true = np.array( - [ - [bb1, top_left_bounding_box, far_away_box], - [bb1, top_left_bounding_box, far_away_box], - ], - ) - sample_y_pred = np.array( - [bb1_off_by_1_pred, top_left_bounding_box, another_far_away_pred], - ) - - result = iou_lib.compute_iou(sample_y_true, sample_y_pred, "yxyx") - self.assertAllClose(expected_result, result) - - def test_unbatched_boxes1_batched_boxes2(self): - bb1 = [100, 101, 200, 201] - bb1_off_by_1_pred = [101, 102, 201, 202] - iou_bb1_bb1_off = 0.96097656633 - top_left_bounding_box = [0, 2, 1, 3] - far_away_box = [1300, 1400, 1500, 1401] - another_far_away_pred = [1000, 1400, 1200, 1401] - - # Rows represent predictions, columns ground truths - expected_result = np.array( - [ - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - ], - ) - - sample_y_true = np.array( - [ - [bb1, top_left_bounding_box, far_away_box], - ], - ) - sample_y_pred = np.array( - [ - [ - bb1_off_by_1_pred, - top_left_bounding_box, - another_far_away_pred, - ], - [ - bb1_off_by_1_pred, - top_left_bounding_box, - another_far_away_pred, - ], - ], - ) - - result = iou_lib.compute_iou(sample_y_true, sample_y_pred, "yxyx") - self.assertAllClose(expected_result, result) diff --git a/keras_hub/src/bounding_box/to_dense.py b/keras_hub/src/bounding_box/to_dense.py deleted file mode 100644 index 68b00d065f..0000000000 --- a/keras_hub/src/bounding_box/to_dense.py +++ /dev/null @@ -1,81 +0,0 @@ -import keras_hub.src.bounding_box.validate_format as validate_format -from keras_hub.src.api_export import keras_hub_export - -try: - import tensorflow as tf -except ImportError: - tf = None - - -def _box_shape(batched, boxes_shape, max_boxes): - # ensure we dont drop the final axis in RaggedTensor mode - if max_boxes is None: - shape = list(boxes_shape) - shape[-1] = 4 - return shape - if batched: - return [None, max_boxes, 4] - return [max_boxes, 4] - - -def _classes_shape(batched, classes_shape, max_boxes): - if max_boxes is None: - return None - if batched: - return [None, max_boxes] + classes_shape[2:] - return [max_boxes] + classes_shape[2:] - - -@keras_hub_export("keras_hub.bounding_box.to_dense") -def to_dense(bounding_boxes, max_boxes=None, default_value=-1): - """to_dense converts bounding boxes to Dense tensors - - Args: - bounding_boxes: bounding boxes in KerasCV dictionary format. - max_boxes: the maximum number of boxes, used to pad tensors to a given - shape. This can be used to make object detection pipelines TPU - compatible. - default_value: the default value to pad bounding boxes with. defaults - to -1. - """ - info = validate_format.validate_format(bounding_boxes) - - # guards against errors in metrics regarding modification of inputs. - # also guards against unexpected behavior when modifying downstream - bounding_boxes = bounding_boxes.copy() - - # Already running in masked mode - if not info["ragged"]: - # even if already ragged, still copy the dictionary for API consistency - return bounding_boxes - - if isinstance(bounding_boxes["classes"], tf.RaggedTensor): - bounding_boxes["classes"] = bounding_boxes["classes"].to_tensor( - default_value=default_value, - shape=_classes_shape( - info["is_batched"], bounding_boxes["classes"].shape, max_boxes - ), - ) - - if isinstance(bounding_boxes["boxes"], tf.RaggedTensor): - bounding_boxes["boxes"] = bounding_boxes["boxes"].to_tensor( - default_value=default_value, - shape=_box_shape( - info["is_batched"], bounding_boxes["boxes"].shape, max_boxes - ), - ) - - if "confidence" in bounding_boxes: - if isinstance(bounding_boxes["confidence"], tf.RaggedTensor): - bounding_boxes["confidence"] = bounding_boxes[ - "confidence" - ].to_tensor( - default_value=default_value, - shape=_classes_shape( - info["is_batched"], - bounding_boxes["confidence"].shape, - max_boxes, - ), - ) - - return bounding_boxes diff --git a/keras_hub/src/bounding_box/to_dense_test.py b/keras_hub/src/bounding_box/to_dense_test.py deleted file mode 100644 index 91acb8137a..0000000000 --- a/keras_hub/src/bounding_box/to_dense_test.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest -import tensorflow as tf -from keras import backend - -from keras_hub.src.bounding_box import to_dense -from keras_hub.src.tests.test_case import TestCase - - -class ToDenseTest(TestCase): - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_converts_to_dense(self): - bounding_boxes = { - "boxes": tf.ragged.constant( - [[[0, 0, 1, 1]], [[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 1, 1]]] - ), - "classes": tf.ragged.constant([[0], [1, 2, 3]]), - } - bounding_boxes = to_dense.to_dense(bounding_boxes) - self.assertEqual(bounding_boxes["boxes"].shape, [2, 3, 4]) - self.assertEqual(bounding_boxes["classes"].shape, [2, 3]) diff --git a/keras_hub/src/bounding_box/to_ragged.py b/keras_hub/src/bounding_box/to_ragged.py deleted file mode 100644 index f86712dd35..0000000000 --- a/keras_hub/src/bounding_box/to_ragged.py +++ /dev/null @@ -1,86 +0,0 @@ -import keras - -import keras_hub.src.bounding_box.validate_format as validate_format -from keras_hub.src.api_export import keras_hub_export - -try: - import tensorflow as tf -except ImportError: - tf = None - - -@keras_hub_export("keras_hub.bounding_box.to_ragged") -def to_ragged(bounding_boxes, sentinel=-1, dtype="float32"): - """converts a Dense padded bounding box `tf.Tensor` to a `tf.RaggedTensor`. - - Bounding boxes are ragged tensors in most use cases. Converting them to a - dense tensor makes it easier to work with Tensorflow ecosystem. - This function can be used to filter out the masked out bounding boxes by - checking for padded sentinel value of the class_id axis of the - bounding_boxes. - - Example: - ```python - bounding_boxes = { - "boxes": tf.constant([[2, 3, 4, 5], [0, 1, 2, 3]]), - "classes": tf.constant([[-1, 1]]), - } - bounding_boxes = bounding_box.to_ragged(bounding_boxes) - print(bounding_boxes) - # { - # "boxes": [[0, 1, 2, 3]], - # "classes": [[1]] - # } - ``` - - Args: - bounding_boxes: a Tensor of bounding boxes. May be batched, or - unbatched. - sentinel: The value indicating that a bounding box does not exist at the - current index, and the corresponding box is padding, defaults to -1. - dtype: the data type to use for the underlying Tensors. - Returns: - dictionary of `tf.RaggedTensor` or 'tf.Tensor' containing the filtered - bounding boxes. - """ - if keras.config.backend() != "tensorflow": - raise NotImplementedError( - "`bounding_box.to_ragged` was called using a backend which does " - "not support ragged tensors. " - f"Current backend: {keras.backend.backend()}." - ) - - info = validate_format.validate_format(bounding_boxes) - - if info["ragged"]: - return bounding_boxes - - boxes = bounding_boxes.get("boxes") - classes = bounding_boxes.get("classes") - confidence = bounding_boxes.get("confidence", None) - - mask = classes != sentinel - - boxes = tf.ragged.boolean_mask(boxes, mask) - classes = tf.ragged.boolean_mask(classes, mask) - if confidence is not None: - confidence = tf.ragged.boolean_mask(confidence, mask) - - if isinstance(boxes, tf.Tensor): - boxes = tf.RaggedTensor.from_tensor(boxes) - - if isinstance(classes, tf.Tensor) and len(classes.shape) > 1: - classes = tf.RaggedTensor.from_tensor(classes) - - if confidence is not None: - if isinstance(confidence, tf.Tensor) and len(confidence.shape) > 1: - confidence = tf.RaggedTensor.from_tensor(confidence) - - result = bounding_boxes.copy() - result["boxes"] = tf.cast(boxes, dtype) - result["classes"] = tf.cast(classes, dtype) - - if confidence is not None: - result["confidence"] = tf.cast(confidence, dtype) - - return result diff --git a/keras_hub/src/bounding_box/to_ragged_test.py b/keras_hub/src/bounding_box/to_ragged_test.py deleted file mode 100644 index 9b76866ddc..0000000000 --- a/keras_hub/src/bounding_box/to_ragged_test.py +++ /dev/null @@ -1,87 +0,0 @@ -import numpy as np -import pytest -from keras import backend - -from keras_hub.src.bounding_box import to_dense -from keras_hub.src.bounding_box import to_ragged -from keras_hub.src.tests.test_case import TestCase - - -class ToRaggedTest(TestCase): - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_converts_to_ragged(self): - bounding_boxes = { - "boxes": np.array( - [[[0, 0, 0, 0], [0, 0, 0, 0]], [[2, 3, 4, 5], [0, 1, 2, 3]]] - ), - "classes": np.array([[-1, -1], [-1, 1]]), - "confidence": np.array([[0.5, 0.7], [0.23, 0.12]]), - } - bounding_boxes = to_ragged.to_ragged(bounding_boxes) - - self.assertEqual(bounding_boxes["boxes"][1].shape, [1, 4]) - self.assertEqual(bounding_boxes["classes"][1].shape, [1]) - self.assertEqual( - bounding_boxes["confidence"][1].shape, - [ - 1, - ], - ) - - self.assertEqual(bounding_boxes["classes"][0].shape, [0]) - self.assertEqual(bounding_boxes["boxes"][0].shape, [0, 4]) - self.assertEqual( - bounding_boxes["confidence"][0].shape, - [ - 0, - ], - ) - - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_round_trip(self): - original = { - "boxes": np.array( - [ - [[0, 0, 0, 0], [-1, -1, -1, -1]], - [[-1, -1, -1, -1], [-1, -1, -1, -1]], - ] - ), - "classes": np.array([[1, -1], [-1, -1]]), - "confidence": np.array([[0.5, -1], [-1, -1]]), - } - bounding_boxes = to_ragged.to_ragged(original) - bounding_boxes = to_dense.to_dense(bounding_boxes, max_boxes=2) - - self.assertEqual(bounding_boxes["boxes"][1].shape, [2, 4]) - self.assertEqual(bounding_boxes["classes"][1].shape, [2]) - self.assertEqual(bounding_boxes["classes"][0].shape, [2]) - self.assertEqual(bounding_boxes["boxes"][0].shape, [2, 4]) - self.assertEqual(bounding_boxes["confidence"][0].shape, [2]) - - self.assertAllEqual(bounding_boxes["boxes"], original["boxes"]) - self.assertAllEqual(bounding_boxes["classes"], original["classes"]) - self.assertAllEqual( - bounding_boxes["confidence"], original["confidence"] - ) - - @pytest.mark.skipif( - backend.backend() == "tensorflow", - reason="Only applies to backends which don't support raggeds", - ) - def test_backend_without_raggeds_throws(self): - bounding_boxes = { - "boxes": np.array( - [[[0, 0, 0, 0], [0, 0, 0, 0]], [[2, 3, 4, 5], [0, 1, 2, 3]]] - ), - "classes": np.array([[-1, -1], [-1, 1]]), - "confidence": np.array([[0.5, 0.7], [0.23, 0.12]]), - } - - with self.assertRaisesRegex(NotImplementedError, "support ragged"): - to_ragged.to_ragged(bounding_boxes) diff --git a/keras_hub/src/bounding_box/utils.py b/keras_hub/src/bounding_box/utils.py deleted file mode 100644 index ac4fe8d05b..0000000000 --- a/keras_hub/src/bounding_box/utils.py +++ /dev/null @@ -1,181 +0,0 @@ -"""Utility functions for working with bounding boxes.""" - -from keras import ops - -from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.bounding_box import converters -from keras_hub.src.bounding_box.formats import XYWH - - -@keras_hub_export("keras_hub.bounding_box.is_relative") -def is_relative(bounding_box_format): - """A util to check if a bounding box format uses relative coordinates""" - if bounding_box_format.lower() not in converters.TO_XYXY_CONVERTERS: - raise ValueError( - "`is_relative()` received an unsupported format for the argument " - f"`bounding_box_format`. `bounding_box_format` should be one of " - f"{converters.TO_XYXY_CONVERTERS.keys()}. " - f"Got bounding_box_format={bounding_box_format}" - ) - - return bounding_box_format.startswith("rel") - - -@keras_hub_export("keras_hub.bounding_box.as_relative") -def as_relative(bounding_box_format): - """A util to get the relative equivalent of a provided bounding box format. - - If the specified format is already a relative format, - it will be returned unchanged. - """ - - if not is_relative(bounding_box_format): - return "rel_" + bounding_box_format - - return bounding_box_format - - -def _relative_area(boxes, bounding_box_format): - boxes = converters.convert_format( - boxes, - source=bounding_box_format, - target="rel_xywh", - ) - widths = boxes[..., XYWH.WIDTH] - heights = boxes[..., XYWH.HEIGHT] - # handle corner case where shear performs a full inversion. - return ops.where( - ops.logical_and(widths > 0, heights > 0), widths * heights, 0.0 - ) - - -@keras_hub_export("keras_hub.bounding_box.clip_to_image") -def clip_to_image( - bounding_boxes, bounding_box_format, images=None, image_shape=None -): - """clips bounding boxes to image boundaries. - - `clip_to_image()` clips bounding boxes that have coordinates out of bounds - of an image down to the boundaries of the image. This is done by converting - the bounding box to relative formats, then clipping them to the `[0, 1]` - range. Additionally, bounding boxes that end up with a zero area have their - class ID set to -1, indicating that there is no object present in them. - - Args: - bounding_boxes: bounding box tensor to clip. - bounding_box_format: the KerasCV bounding box format the bounding boxes - are in. - images: list of images to clip the bounding boxes to. - image_shape: the shape of the images to clip the bounding boxes to. - """ - boxes, classes = bounding_boxes["boxes"], bounding_boxes["classes"] - - boxes = converters.convert_format( - boxes, - source=bounding_box_format, - target="rel_xyxy", - images=images, - image_shape=image_shape, - ) - boxes, classes, images, squeeze = _format_inputs(boxes, classes, images) - x1, y1, x2, y2 = ops.split(boxes, 4, axis=-1) - clipped_bounding_boxes = ops.concatenate( - [ - ops.clip(x1, 0, 1), - ops.clip(y1, 0, 1), - ops.clip(x2, 0, 1), - ops.clip(y2, 0, 1), - ], - axis=-1, - ) - areas = _relative_area( - clipped_bounding_boxes, bounding_box_format="rel_xyxy" - ) - clipped_bounding_boxes = converters.convert_format( - clipped_bounding_boxes, - source="rel_xyxy", - target=bounding_box_format, - images=images, - image_shape=image_shape, - ) - clipped_bounding_boxes = ops.where( - ops.expand_dims(areas > 0.0, axis=-1), clipped_bounding_boxes, -1.0 - ) - classes = ops.where(areas > 0.0, classes, -1) - nan_indices = ops.any(ops.isnan(clipped_bounding_boxes), axis=-1) - classes = ops.where(nan_indices, -1, classes) - - # TODO update dict and return - clipped_bounding_boxes, classes = _format_outputs( - clipped_bounding_boxes, classes, squeeze - ) - - bounding_boxes.update({"boxes": clipped_bounding_boxes, "classes": classes}) - - return bounding_boxes - - -@keras_hub_export("keras_hub.bounding_box.clip_boxes") -def clip_boxes(boxes, image_shape): - """Clip boxes to the boundaries of the image shape""" - if boxes.shape[-1] != 4: - raise ValueError( - "boxes.shape[-1] is {:d}, but must be 4.".format(boxes.shape[-1]) - ) - - if isinstance(image_shape, list) or isinstance(image_shape, tuple): - height, width, _ = image_shape - max_length = ops.stack([height, width, height, width], axis=-1) - else: - image_shape = ops.cast(image_shape, dtype=boxes.dtype) - height = image_shape[0] - width = image_shape[1] - max_length = ops.stack([height, width, height, width], axis=-1) - - clipped_boxes = ops.maximum(ops.minimum(boxes, max_length), 0.0) - return clipped_boxes - - -def _format_inputs(boxes, classes, images): - boxes_rank = len(boxes.shape) - if boxes_rank > 3: - raise ValueError( - "Expected len(boxes.shape)=2, or len(boxes.shape)=3, got " - f"len(boxes.shape)={boxes_rank}" - ) - boxes_includes_batch = boxes_rank == 3 - # Determine if images needs an expand_dims() call - if images is not None: - images_rank = len(images.shape) - if images_rank > 4: - raise ValueError( - "Expected len(images.shape)=2, or len(images.shape)=3, got " - f"len(images.shape)={images_rank}" - ) - images_include_batch = images_rank == 4 - if boxes_includes_batch != images_include_batch: - raise ValueError( - "clip_to_image() expects both boxes and images to be batched, " - "or both boxes and images to be unbatched. Received " - f"len(boxes.shape)={boxes_rank}, " - f"len(images.shape)={images_rank}. Expected either " - "len(boxes.shape)=2 AND len(images.shape)=3, or " - "len(boxes.shape)=3 AND len(images.shape)=4." - ) - if not images_include_batch: - images = ops.expand_dims(images, axis=0) - - if not boxes_includes_batch: - return ( - ops.expand_dims(boxes, axis=0), - ops.expand_dims(classes, axis=0), - images, - True, - ) - return boxes, classes, images, False - - -def _format_outputs(boxes, classes, squeeze): - if squeeze: - return ops.squeeze(boxes, axis=0), ops.squeeze(classes, axis=0) - return boxes, classes diff --git a/keras_hub/src/bounding_box/utils_test.py b/keras_hub/src/bounding_box/utils_test.py deleted file mode 100644 index 40ad8e6e07..0000000000 --- a/keras_hub/src/bounding_box/utils_test.py +++ /dev/null @@ -1,155 +0,0 @@ -import numpy as np -from keras import ops - -from keras_hub.src.bounding_box import utils -from keras_hub.src.tests.test_case import TestCase - - -class BoundingBoxUtilTest(TestCase): - def test_clip_to_image_standard(self): - # Test xyxy format unbatched - height = 256 - width = 256 - bounding_boxes = { - "boxes": np.array([[200, 200, 400, 400], [100, 100, 300, 300]]), - "classes": np.array([0, 0]), - } - image = ops.ones(shape=(height, width, 3)) - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="xyxy", images=image - ) - boxes = bounding_boxes["boxes"] - self.assertAllGreaterEqual(ops.convert_to_numpy(boxes), 0) - ( - x1, - y1, - x2, - y2, - ) = ops.split(boxes, 4, axis=1) - self.assertAllLessEqual( - ops.convert_to_numpy(ops.concatenate([x1, x2], axis=1)), width - ) - self.assertAllLessEqual( - ops.convert_to_numpy(ops.concatenate([y1, y2], axis=1)), height - ) - # Test relative format batched - image = ops.ones(shape=(1, height, width, 3)) - - bounding_boxes = { - "boxes": np.array([[[0.2, -1, 1.2, 0.3], [0.4, 1.5, 0.2, 0.3]]]), - "classes": np.array([[0, 0]]), - } - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="rel_xyxy", images=image - ) - boxes = bounding_boxes["boxes"] - self.assertAllLessEqual(ops.convert_to_numpy(boxes), 1) - - def test_clip_to_image_filters_fully_out_bounding_boxes(self): - # Test xyxy format unbatched - height = 256 - width = 256 - bounding_boxes = { - "boxes": np.array([[257, 257, 400, 400], [100, 100, 300, 300]]), - "classes": np.array([0, 0]), - } - image = ops.ones(shape=(height, width, 3)) - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="xyxy", images=image - ) - - ( - self.assertAllEqual( - bounding_boxes["boxes"], - np.array([[-1, -1, -1, -1], [100, 100, 256, 256]]), - ), - ) - self.assertAllEqual( - bounding_boxes["classes"], - np.array([-1, 0]), - ) - - def test_clip_to_image_filters_fully_out_bounding_boxes_negative_area(self): - # Test xyxy format unbatched - height = 256 - width = 256 - bounding_boxes = { - "boxes": np.array([[110, 120, 100, 100], [100, 100, 300, 300]]), - "classes": np.array([0, 0]), - } - image = ops.ones(shape=(height, width, 3)) - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="xyxy", images=image - ) - self.assertAllEqual( - bounding_boxes["boxes"], - np.array( - [ - [ - -1, - -1, - -1, - -1, - ], - [ - 100, - 100, - 256, - 256, - ], - ] - ), - ) - self.assertAllEqual( - bounding_boxes["classes"], - np.array([-1, 0]), - ) - - def test_clip_to_image_filters_nans(self): - # Test xyxy format unbatched - height = 256 - width = 256 - bounding_boxes = { - "boxes": np.array( - [[0, float("NaN"), 100, 100], [100, 100, 300, 300]] - ), - "classes": np.array([0, 0]), - } - image = ops.ones(shape=(height, width, 3)) - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="xyxy", images=image - ) - self.assertAllEqual( - bounding_boxes["boxes"], - np.array( - [ - [ - -1, - -1, - -1, - -1, - ], - [ - 100, - 100, - 256, - 256, - ], - ] - ), - ) - self.assertAllEqual( - bounding_boxes["classes"], - np.array([-1, 0]), - ) - - def test_is_relative_util(self): - self.assertTrue(utils.is_relative("rel_xyxy")) - self.assertFalse(utils.is_relative("xyxy")) - - with self.assertRaises(ValueError): - _ = utils.is_relative("bad_format") - - def test_as_relative_util(self): - self.assertEqual(utils.as_relative("yxyx"), "rel_yxyx") - self.assertEqual(utils.as_relative("rel_xywh"), "rel_xywh") diff --git a/keras_hub/src/bounding_box/validate_format.py b/keras_hub/src/bounding_box/validate_format.py deleted file mode 100644 index 8680dbb693..0000000000 --- a/keras_hub/src/bounding_box/validate_format.py +++ /dev/null @@ -1,85 +0,0 @@ -from keras_hub.src.api_export import keras_hub_export - -try: - import tensorflow as tf -except ImportError: - tf = None - - -@keras_hub_export("keras_hub.bounding_box.validate_format") -def validate_format(bounding_boxes, variable_name="bounding_boxes"): - """validates that a given set of bounding boxes complies with KerasHub - format. - - For a set of bounding boxes to be valid it must satisfy the following - conditions: - - `bounding_boxes` must be a dictionary - - contains keys `"boxes"` and `"classes"` - - each entry must have matching first two dimensions; representing the batch - axis and the number of boxes per image axis. - - either both `"boxes"` and `"classes"` are batched, or both are unbatched. - - Additionally, one of the following must be satisfied: - - `"boxes"` and `"classes"` are both Ragged - - `"boxes"` and `"classes"` are both Dense - - `"boxes"` and `"classes"` are unbatched - - Args: - bounding_boxes: dictionary of bounding boxes according to KerasCV - format. - - Raises: - ValueError if any of the above conditions are not met - """ - if not isinstance(bounding_boxes, dict): - raise ValueError( - f"Expected `{variable_name}` to be a dictionary, got " - f"`{variable_name}={bounding_boxes}`." - ) - if not all([x in bounding_boxes for x in ["boxes", "classes"]]): - raise ValueError( - f"Expected `{variable_name}` to be a dictionary containing keys " - "`'classes'` and `'boxes'`. Got " - f"`{variable_name}.keys()={bounding_boxes.keys()}`." - ) - - boxes = bounding_boxes.get("boxes") - classes = bounding_boxes.get("classes") - info = {} - - is_batched = len(boxes.shape) == 3 - info["is_batched"] = is_batched - info["ragged"] = isinstance(boxes, tf.RaggedTensor) - - if not is_batched: - if boxes.shape[:1] != classes.shape[:1]: - raise ValueError( - "Expected `boxes` and `classes` to have matching dimensions " - "on the first axis when operating in unbatched mode. Got " - f"`boxes.shape={boxes.shape}`, `classes.shape={classes.shape}`." - ) - - info["classes_one_hot"] = len(classes.shape) == 2 - # No Ragged checks needed in unbatched mode. - return info - - info["classes_one_hot"] = len(classes.shape) == 3 - - if isinstance(boxes, tf.RaggedTensor) != isinstance( - classes, tf.RaggedTensor - ): - raise ValueError( - "Either both `boxes` and `classes` " - "should be Ragged, or neither should be ragged." - f" Got `type(boxes)={type(boxes)}`, type(classes)={type(classes)}." - ) - - # Batched mode checks - if boxes.shape[:2] != classes.shape[:2]: - raise ValueError( - "Expected `boxes` and `classes` to have matching dimensions " - "on the first two axes when operating in batched mode. " - f"Got `boxes.shape={boxes.shape}`, `classes.shape={classes.shape}`." - ) - - return info diff --git a/keras_hub/src/bounding_box/validate_format_test.py b/keras_hub/src/bounding_box/validate_format_test.py deleted file mode 100644 index e2025e290a..0000000000 --- a/keras_hub/src/bounding_box/validate_format_test.py +++ /dev/null @@ -1,34 +0,0 @@ -import numpy as np - -from keras_hub.src.bounding_box import validate_format -from keras_hub.src.tests.test_case import TestCase - - -class ValidateTest(TestCase): - def test_raises_nondict(self): - with self.assertRaisesRegex( - ValueError, "Expected `bounding_boxes` to be a dictionary, got " - ): - validate_format.validate_format(np.ones((4, 3, 6))) - - def test_mismatch_dimensions(self): - with self.assertRaisesRegex( - ValueError, - "Expected `boxes` and `classes` to have matching dimensions", - ): - validate_format.validate_format( - {"boxes": np.ones((4, 3, 6)), "classes": np.ones((4, 6))} - ) - - def test_bad_keys(self): - with self.assertRaisesRegex(ValueError, "containing keys"): - validate_format.validate_format( - { - "box": [ - 1, - 2, - 3, - ], - "class": [1234], - } - ) diff --git a/keras_hub/src/layers/modeling/anchor_generator.py b/keras_hub/src/layers/modeling/anchor_generator.py index 5aefaaeaf7..9b81172d4a 100644 --- a/keras_hub/src/layers/modeling/anchor_generator.py +++ b/keras_hub/src/layers/modeling/anchor_generator.py @@ -21,6 +21,7 @@ class AnchorGenerator(keras.layers.Layer): detecting smaller objects. - Higher levels (e.g., P7) have lower resolution and are used for larger objects. + Args: bounding_box_format: str. The format of the bounding boxes to be generated. Expected to be a string like 'xyxy', 'xywh', etc. @@ -33,10 +34,12 @@ class AnchorGenerator(keras.layers.Layer): each level. Each number indicates the ratio of width to height. anchor_size: float. Scale of size of the base anchor relative to the feature stride 2^level. + Call arguments: inputs: An image tensor with shape `[B, H, W, C]` or `[H, W, C]`. Its shape will be used to determine anchor sizes. + Returns: Dict: A dictionary mapping feature levels (e.g., 'P3', 'P4', etc.) to anchor boxes. Each entry contains a @@ -45,9 +48,10 @@ class AnchorGenerator(keras.layers.Layer): where H and W are the height and width of the image, stride is 2^level, and num_anchors_per_location is `num_scales * len(aspect_ratios)`. + Example: ```python - anchor_generator = AnchorGenerator( + anchor_generator = keras_hub.layers.AnchorGenerator( bounding_box_format='xyxy', min_level=3, max_level=7, diff --git a/keras_hub/src/layers/modeling/box_matcher.py b/keras_hub/src/layers/modeling/box_matcher.py index b841e8deb5..6a6cf7149e 100644 --- a/keras_hub/src/layers/modeling/box_matcher.py +++ b/keras_hub/src/layers/modeling/box_matcher.py @@ -54,10 +54,16 @@ class BoxMatcher(keras.layers.Layer): Example: ```python - box_matcher = keras_cv.layers.BoxMatcher([0.3, 0.7], [-1, 0, 1]) - iou_metric = keras_cv.bounding_box.compute_iou(anchors, boxes) - matched_columns, matched_match_values = box_matcher(iou_metric) - cls_mask = ops.less_equal(matched_match_values, 0) + positive_threshold = 0.5 + negative_threshold = 0.4 + + matcher = keras_hub.layers.BoxMatcher( + thresholds=[negative_threshold, positive_threshold], + match_values=[-1, -2, 1], + ) + match_indices, matched_values = matcher(sim_matrix) + positive_mask = ops.equal(matched_vals, 1) + ignore_mask = ops.equal(matched_vals, -2) ``` """ diff --git a/keras_hub/src/layers/modeling/non_max_supression.py b/keras_hub/src/layers/modeling/non_max_supression.py index 207891ac9e..2ff8c35d1f 100644 --- a/keras_hub/src/layers/modeling/non_max_supression.py +++ b/keras_hub/src/layers/modeling/non_max_supression.py @@ -29,6 +29,28 @@ class NonMaxSuppression(keras.layers.Layer): max_detections: the maximum detections to consider after nms is applied. A large number may trigger significant memory overhead, defaults to 100. + + Example: + ``` + boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) + classes = np.expand_dims( + np.array( + [[0.1, 0.1, 0.4, 0.5, 0.9], [0.7, 0.5, 0.3, 0.0, 0.0]], + "float32", + ), + axis=-1, + ) + + nms = keras_hub.layers.NonMaxSuppression( + bounding_box_format="yxyx", + from_logits=False, + iou_threshold=1.0, + confidence_threshold=0.1, + max_detections=1, + ) + + nms_outputs = nms(boxes, classes) + ``` """ def __init__( From feffff6cdf160748279e92714ebbe6412f147ade Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Thu, 16 Jan 2025 13:39:14 -0800 Subject: [PATCH 03/15] nit --- keras_hub/src/layers/modeling/non_max_supression.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_hub/src/layers/modeling/non_max_supression.py b/keras_hub/src/layers/modeling/non_max_supression.py index 2ff8c35d1f..24981ea1f1 100644 --- a/keras_hub/src/layers/modeling/non_max_supression.py +++ b/keras_hub/src/layers/modeling/non_max_supression.py @@ -558,7 +558,7 @@ def mask_invalid_detections(bounding_boxes): if "num_detections" not in bounding_boxes: raise ValueError( "`bounding_boxes` must have key 'num_detections' " - "to be used with `bounding_box.mask_invalid_detections()`." + "to be used with `mask_invalid_detections()`." ) boxes = bounding_boxes.get("boxes") From 71c560bf78bb1a27594ec0b577170f3952f1a72e Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Thu, 23 Jan 2025 11:41:27 -0800 Subject: [PATCH 04/15] Import obj_det when satisfies keras version and nit --- .../src/layers/modeling/non_max_supression.py | 29 ++++++++++++++++--- .../retinanet/retinanet_image_converter.py | 9 ++---- .../retinanet/retinanet_object_detector.py | 5 ++++ 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/keras_hub/src/layers/modeling/non_max_supression.py b/keras_hub/src/layers/modeling/non_max_supression.py index 24981ea1f1..7a663d0c92 100644 --- a/keras_hub/src/layers/modeling/non_max_supression.py +++ b/keras_hub/src/layers/modeling/non_max_supression.py @@ -2,9 +2,6 @@ import keras from keras import ops -from keras.src.layers.preprocessing.image_preprocessing.bounding_boxes import ( - validation, -) from keras_hub.src.api_export import keras_hub_export @@ -553,7 +550,16 @@ def mask_invalid_detections(bounding_boxes): returned value will also return `tf.RaggedTensor` representations. """ # ensure we are complying with Keras bounding box format. - validation.validate_bounding_boxes(bounding_boxes) + if ( + not isinstance(bounding_boxes, dict) + or "labels" not in bounding_boxes + or "boxes" not in bounding_boxes + ): + raise ValueError( + "Expected `bounding_boxes` agurment to be a " + "dict with keys 'boxes' and 'labels'. Received: " + f"bounding_boxes={bounding_boxes}" + ) if "num_detections" not in bounding_boxes: raise ValueError( @@ -563,6 +569,21 @@ def mask_invalid_detections(bounding_boxes): boxes = bounding_boxes.get("boxes") labels = bounding_boxes.get("labels") + if isinstance(boxes, list): + if not isinstance(labels, list): + raise ValueError( + "If `bounding_boxes['boxes']` is a list, then " + "`bounding_boxes['labels']` must also be a list." + f"Received: bounding_boxes['labels']={labels}" + ) + if len(boxes) != len(labels): + raise ValueError( + "If `bounding_boxes['boxes']` and " + "`bounding_boxes['labels']` are both lists, " + "they must have the same length. Received: " + f"len(bounding_boxes['boxes'])={len(boxes)} and " + f"len(bounding_boxes['labels'])={len(labels)} and " + ) confidence = bounding_boxes.get("confidence", None) num_detections = bounding_boxes.get("num_detections") diff --git a/keras_hub/src/models/retinanet/retinanet_image_converter.py b/keras_hub/src/models/retinanet/retinanet_image_converter.py index 279c5ef92d..c12a19c9f1 100644 --- a/keras_hub/src/models/retinanet/retinanet_image_converter.py +++ b/keras_hub/src/models/retinanet/retinanet_image_converter.py @@ -12,8 +12,7 @@ class RetinaNetImageConverter(ImageConverter): def __init__( self, - bounding_box_format, - pad_to_aspect_ratio=False, + bounding_box_format="yxyx", norm_mean=[0.485, 0.456, 0.406], norm_std=[0.229, 0.224, 0.225], **kwargs, @@ -24,7 +23,7 @@ def __init__( width=self.image_size[1] if self.image_size else None, bounding_box_format=bounding_box_format, crop_to_aspect_ratio=self.crop_to_aspect_ratio, - pad_to_aspect_ratio=pad_to_aspect_ratio, + pad_to_aspect_ratio=self.pad_to_aspect_ratio, interpolation=self.interpolation, data_format=self.data_format, dtype=self.dtype_policy, @@ -32,7 +31,6 @@ def __init__( ) self.bounding_box_format = bounding_box_format - self.pad_to_aspect_ratio = pad_to_aspect_ratio self.norm_mean = norm_mean self.norm_std = norm_std @@ -55,14 +53,13 @@ def call(self, x, y=None, sample_weight=None): if self.norm_std: x = x / self._expand_non_channel_dims(self.norm_std, x) - return x, y + return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) def get_config(self): config = super().get_config() config.update( { "bounding_box_format": self.bounding_box_format, - "pad_to_aspect_ratio": self.pad_to_aspect_ratio, "norm_mean": self.norm_mean, "norm_std": self.norm_std, } diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector.py b/keras_hub/src/models/retinanet/retinanet_object_detector.py index d9523a3a58..075c5f741c 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector.py @@ -1,5 +1,6 @@ import keras from keras import ops +from packaging import version from keras_hub.src.api_export import keras_hub_export from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator @@ -14,6 +15,10 @@ RetinaNetObjectDetectorPreprocessor, ) +# Check if Keras version is greater than or equal to 2.10.0 +if version.parse(keras.__version__) < version.parse("3.8.0"): + raise ImportError("Requires 3.8.0 or higher.") + @keras_hub_export("keras_hub.models.RetinaNetObjectDetector") class RetinaNetObjectDetector(ImageObjectDetector): From 9fa1e57de3bdcc939f198b5de0f479efa0e73b59 Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Mon, 27 Jan 2025 14:14:07 -0800 Subject: [PATCH 05/15] Correct api names and use assert for boudning box support --- keras_hub/api/layers/__init__.py | 4 +--- keras_hub/api/models/__init__.py | 8 +++---- .../src/layers/modeling/anchor_generator.py | 4 ++++ keras_hub/src/layers/modeling/box_matcher.py | 4 ++++ .../src/layers/modeling/non_max_supression.py | 21 +++++++++------- ..._object_detector.py => object_detector.py} | 14 +++++------ ...sor.py => object_detector_preprocessor.py} | 24 +++++++++---------- .../retinanet/retinanet_image_converter.py | 16 +++++++++---- .../retinanet/retinanet_object_detector.py | 13 +++++----- .../retinanet_object_detector_preprocessor.py | 6 ++--- keras_hub/src/utils/tensor_utils.py | 8 +++++++ 11 files changed, 72 insertions(+), 50 deletions(-) rename keras_hub/src/models/{image_object_detector.py => object_detector.py} (87%) rename keras_hub/src/models/{image_object_detector_preprocessor.py => object_detector_preprocessor.py} (69%) diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index c43a4d8b79..928becf3c0 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -12,9 +12,7 @@ ) from keras_hub.src.layers.modeling.f_net_encoder import FNetEncoder from keras_hub.src.layers.modeling.masked_lm_head import MaskedLMHead -from keras_hub.src.layers.modeling.non_max_supression import ( - NonMaxSuppression as NonMaxSupression, -) +from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression from keras_hub.src.layers.modeling.position_embedding import PositionEmbedding from keras_hub.src.layers.modeling.reversible_embedding import ( ReversibleEmbedding, diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 7c7adbf97c..c1a4ce519d 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -193,10 +193,6 @@ from keras_hub.src.models.image_classifier_preprocessor import ( ImageClassifierPreprocessor, ) -from keras_hub.src.models.image_object_detector import ImageObjectDetector -from keras_hub.src.models.image_object_detector_preprocessor import ( - ImageObjectDetectorPreprocessor, -) from keras_hub.src.models.image_segmenter import ImageSegmenter from keras_hub.src.models.image_segmenter_preprocessor import ( ImageSegmenterPreprocessor, @@ -232,6 +228,10 @@ from keras_hub.src.models.mobilenet.mobilenet_image_classifier import ( MobileNetImageClassifier, ) +from keras_hub.src.models.object_detector import ObjectDetector +from keras_hub.src.models.object_detector_preprocessor import ( + ObjectDetectorPreprocessor, +) from keras_hub.src.models.opt.opt_backbone import OPTBackbone from keras_hub.src.models.opt.opt_causal_lm import OPTCausalLM from keras_hub.src.models.opt.opt_causal_lm_preprocessor import ( diff --git a/keras_hub/src/layers/modeling/anchor_generator.py b/keras_hub/src/layers/modeling/anchor_generator.py index 9b81172d4a..418fe7b130 100644 --- a/keras_hub/src/layers/modeling/anchor_generator.py +++ b/keras_hub/src/layers/modeling/anchor_generator.py @@ -4,6 +4,7 @@ from keras import ops from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.utils.tensor_utils import assert_bounding_box_support @keras_hub_export("keras_hub.layers.AnchorGenerator") @@ -73,6 +74,9 @@ def __init__( anchor_size, **kwargs, ): + # Check whether current version of keras support bounding box utils + assert_bounding_box_support(self.__class__.__name__) + super().__init__(**kwargs) self.bounding_box_format = bounding_box_format self.min_level = min_level diff --git a/keras_hub/src/layers/modeling/box_matcher.py b/keras_hub/src/layers/modeling/box_matcher.py index 6a6cf7149e..5f2df744f3 100644 --- a/keras_hub/src/layers/modeling/box_matcher.py +++ b/keras_hub/src/layers/modeling/box_matcher.py @@ -2,6 +2,7 @@ from keras import ops from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.utils.tensor_utils import assert_bounding_box_support @keras_hub_export("keras_hub.layers.BoxMatcher") @@ -75,6 +76,9 @@ def __init__( force_match_for_each_col=False, **kwargs, ): + # Check whether current version of keras support bounding box utils + assert_bounding_box_support(self.__class__.__name__) + super().__init__(**kwargs) if sorted(thresholds) != thresholds: raise ValueError(f"`threshold` must be sorted, got {thresholds}") diff --git a/keras_hub/src/layers/modeling/non_max_supression.py b/keras_hub/src/layers/modeling/non_max_supression.py index 7a663d0c92..d58f92b86a 100644 --- a/keras_hub/src/layers/modeling/non_max_supression.py +++ b/keras_hub/src/layers/modeling/non_max_supression.py @@ -4,31 +4,32 @@ from keras import ops from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.utils.tensor_utils import assert_bounding_box_support EPSILON = 1e-8 -@keras_hub_export("keras_hub.layers.NonMaxSupression") +@keras_hub_export("keras_hub.layers.NonMaxSuppression") class NonMaxSuppression(keras.layers.Layer): """A Keras layer that decodes predictions of an object detection model. Args: - bounding_box_format: The format of bounding boxes of input dataset. - Refer: - for more details on supported bounding box formats. + bounding_box_format: str. The format of bounding boxes of input dataset. + Refer `keras.utils.bounding_boxes.convert_format` args for more + details on supported bounding box formats. from_logits: boolean, True means input score is logits, False means confidence. - iou_threshold: a float value in the range [0, 1] representing the + iou_threshold: float. Value in the range [0, 1] representing the minimum IoU threshold for two boxes to be considered same for suppression. Defaults to 0.5. - confidence_threshold: a float value in the range [0, 1]. All boxes with + confidence_threshold: float. Value in the range [0, 1]. All boxes with confidence below this value will be discarded, defaults to 0.5. - max_detections: the maximum detections to consider after nms is applied. - A large number may trigger significant memory overhead, + max_detections: int. the maximum detections to consider after nms is + applied. A large number may trigger significant memory overhead, defaults to 100. Example: - ``` + ```python boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) classes = np.expand_dims( np.array( @@ -59,6 +60,8 @@ def __init__( max_detections=100, **kwargs, ): + # Check whether current version of keras support bounding box utils + assert_bounding_box_support(self.__class__.__name__) super().__init__(**kwargs) self.bounding_box_format = bounding_box_format self.from_logits = from_logits diff --git a/keras_hub/src/models/image_object_detector.py b/keras_hub/src/models/object_detector.py similarity index 87% rename from keras_hub/src/models/image_object_detector.py rename to keras_hub/src/models/object_detector.py index f8eba20dc9..e248ced22c 100644 --- a/keras_hub/src/models/image_object_detector.py +++ b/keras_hub/src/models/object_detector.py @@ -4,20 +4,20 @@ from keras_hub.src.models.task import Task -@keras_hub_export("keras_hub.models.ImageObjectDetector") -class ImageObjectDetector(Task): +@keras_hub_export("keras_hub.models.ObjectDetector") +class ObjectDetector(Task): """Base class for all image object detection tasks. - The `ImageObjectDetector` tasks wrap a `keras_hub.models.Backbone` and + The `ObjectDetector` tasks wrap a `keras_hub.models.Backbone` and a `keras_hub.models.Preprocessor` to create a model that can be used for - object detection. `ImageObjectDetector` tasks take an additional + object detection. `ObjectDetector` tasks take an additional `num_classes` argument, controlling the number of predicted output classes. To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` labels where `x` is a string and `y` is dictionary with `boxes` and `classes`. - All `ImageObjectDetector` tasks include a `from_preset()` constructor which + All `ObjectDetector` tasks include a `from_preset()` constructor which can be used to load a pre-trained config and weights. """ @@ -29,9 +29,9 @@ def compile( metrics=None, **kwargs, ): - """Configures the `ImageObjectDetector` task for training. + """Configures the `ObjectDetector` task for training. - The `ImageObjectDetector` task extends the default compilation signature + The `ObjectDetector` task extends the default compilation signature of `keras.Model.compile` with defaults for `optimizer`, `loss`, and `metrics`. To override these defaults, pass any value to these arguments during compilation. diff --git a/keras_hub/src/models/image_object_detector_preprocessor.py b/keras_hub/src/models/object_detector_preprocessor.py similarity index 69% rename from keras_hub/src/models/image_object_detector_preprocessor.py rename to keras_hub/src/models/object_detector_preprocessor.py index 6ff6e2c21d..904f1c02d3 100644 --- a/keras_hub/src/models/image_object_detector_preprocessor.py +++ b/keras_hub/src/models/object_detector_preprocessor.py @@ -5,20 +5,20 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function -@keras_hub_export("keras_hub.models.ImageObjectDetectorPreprocessor") -class ImageObjectDetectorPreprocessor(Preprocessor): +@keras_hub_export("keras_hub.models.ObjectDetectorPreprocessor") +class ObjectDetectorPreprocessor(Preprocessor): """Base class for object detector preprocessing layers. - `ImageObjectDetectorPreprocessor` tasks wraps a + `ObjectDetectorPreprocessor` tasks wraps a `keras_hub.layers.Preprocessor` to create a preprocessing layer for object detection tasks. It is intended to be paired with a `keras_hub.models.ImageObjectDetector` task. - All `ImageObjectDetectorPreprocessor` take three inputs, `x`, `y`, and + All `ObjectDetectorPreprocessor` take three inputs, `x`, `y`, and `sample_weight`. `x`, the first input, should always be included. It can be a image or batch of images. See examples below. `y` and `sample_weight` - are optional inputs that will be passed through unaltered. Usually, `y` will - be the a dict of `{"boxes": Tensor(batch_size, num_boxes, 4), + are optional inputs that will be passed through unaltered. Usually, `y` + willbe the a dict of `{"boxes": Tensor(batch_size, num_boxes, 4), "classes": (batch_size, num_boxes)}. The layer will returns either `x`, an `(x, y)` tuple if labels were @@ -26,18 +26,18 @@ class ImageObjectDetectorPreprocessor(Preprocessor): were provided. `x` will be the input images after all model preprocessing has been applied. - All `ImageObjectDetectorPreprocessor` tasks include a `from_preset()` - constructor which can be used to load a pre-trained config and vocabularies. - You can call the `from_preset()` constructor directly on this base class, in - which case the correct class for your model will be automatically - instantiated. + All `ObjectDetectorPreprocessor` tasks include a `from_preset()` + constructor which can be used to load a pre-trained config and + vocabularies. You can call the `from_preset()` constructor directly on + this base class, in which case the correct class for your model will be + automatically instantiated. Args: image_converter: Preprocessing pipeline for images. Examples. ```python - preprocessor = keras_hub.models.ImageObjectDetectorPreprocessor.from_preset( + preprocessor = keras_hub.models.ObjectDetectorPreprocessor.from_preset( "retinanet_resnet50", ) """ diff --git a/keras_hub/src/models/retinanet/retinanet_image_converter.py b/keras_hub/src/models/retinanet/retinanet_image_converter.py index c12a19c9f1..8fbec72f4b 100644 --- a/keras_hub/src/models/retinanet/retinanet_image_converter.py +++ b/keras_hub/src/models/retinanet/retinanet_image_converter.py @@ -12,15 +12,18 @@ class RetinaNetImageConverter(ImageConverter): def __init__( self, - bounding_box_format="yxyx", + image_size=None, + scale=None, + offset=None, norm_mean=[0.485, 0.456, 0.406], norm_std=[0.229, 0.224, 0.225], + bounding_box_format="yxyx", **kwargs, ): super().__init__(**kwargs) self.resizing = keras.layers.Resizing( - height=self.image_size[0] if self.image_size else None, - width=self.image_size[1] if self.image_size else None, + height=self.image_size[0] if image_size else None, + width=self.image_size[1] if image_size else None, bounding_box_format=bounding_box_format, crop_to_aspect_ratio=self.crop_to_aspect_ratio, pad_to_aspect_ratio=self.pad_to_aspect_ratio, @@ -30,9 +33,12 @@ def __init__( name="resizing", ) - self.bounding_box_format = bounding_box_format + self.image_size = image_size + self.scale = scale + self.offset = offset self.norm_mean = norm_mean self.norm_std = norm_std + self.bounding_box_format = bounding_box_format @preprocessing_function def call(self, x, y=None, sample_weight=None): @@ -59,9 +65,9 @@ def get_config(self): config = super().get_config() config.update( { - "bounding_box_format": self.bounding_box_format, "norm_mean": self.norm_mean, "norm_std": self.norm_std, + "bounding_box_format": self.bounding_box_format, } ) return config diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector.py b/keras_hub/src/models/retinanet/retinanet_object_detector.py index 075c5f741c..efc4983512 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector.py @@ -1,11 +1,10 @@ import keras from keras import ops -from packaging import version from keras_hub.src.api_export import keras_hub_export from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression -from keras_hub.src.models.image_object_detector import ImageObjectDetector +from keras_hub.src.models.object_detector import ObjectDetector from keras_hub.src.models.retinanet.prediction_head import PredictionHead from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_label_encoder import ( @@ -14,14 +13,11 @@ from keras_hub.src.models.retinanet.retinanet_object_detector_preprocessor import ( # noqa: E501 RetinaNetObjectDetectorPreprocessor, ) - -# Check if Keras version is greater than or equal to 2.10.0 -if version.parse(keras.__version__) < version.parse("3.8.0"): - raise ImportError("Requires 3.8.0 or higher.") +from keras_hub.src.utils.tensor_utils import assert_bounding_box_support @keras_hub_export("keras_hub.models.RetinaNetObjectDetector") -class RetinaNetObjectDetector(ImageObjectDetector): +class RetinaNetObjectDetector(ObjectDetector): """RetinaNet object detector model. This class implements the RetinaNet object detection architecture. @@ -108,6 +104,9 @@ def __init__( prediction_decoder=None, **kwargs, ): + # Check whether current version of keras support bounding box utils + assert_bounding_box_support(self.__class__.__name__) + # === Layers === image_input = keras.layers.Input(backbone.image_shape, name="images") head_dtype = dtype or backbone.dtype_policy diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py b/keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py index 8bc6d1f796..caa46be1d3 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py @@ -1,6 +1,6 @@ from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.models.image_object_detector_preprocessor import ( - ImageObjectDetectorPreprocessor, +from keras_hub.src.models.object_detector_preprocessor import ( + ObjectDetectorPreprocessor, ) from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_image_converter import ( @@ -9,6 +9,6 @@ @keras_hub_export("keras_hub.models.RetinaNetObjectDetectorPreprocessor") -class RetinaNetObjectDetectorPreprocessor(ImageObjectDetectorPreprocessor): +class RetinaNetObjectDetectorPreprocessor(ObjectDetectorPreprocessor): backbone_cls = RetinaNetBackbone image_converter_cls = RetinaNetImageConverter diff --git a/keras_hub/src/utils/tensor_utils.py b/keras_hub/src/utils/tensor_utils.py index 19eb6975d7..4ceb5b6a38 100644 --- a/keras_hub/src/utils/tensor_utils.py +++ b/keras_hub/src/utils/tensor_utils.py @@ -6,6 +6,7 @@ import keras import numpy as np from keras import ops +from packaging import version try: import tensorflow as tf @@ -262,6 +263,13 @@ def assert_tf_libs_installed(symbol_name): ) +def assert_bounding_box_support(symbol_name): + if version.parse(keras.__version__) < version.parse("3.8.0"): + raise ImportError( + f"{symbol_name} equires Keras version to be 3.8.0 or higher." + ) + + def assert_tf_backend(symbol_name): if keras.config.backend() != "tensorflow": raise RuntimeError( From 26e34d878f988e97aad063fe519da1c0cc171a01 Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Thu, 30 Jan 2025 12:00:02 -0800 Subject: [PATCH 06/15] fix resize error and skip tests version < 3.8.0 --- .../layers/modeling/anchor_generator_test.py | 11 ++++++++ .../src/layers/modeling/box_matcher_test.py | 27 +++++++++++++++++++ .../modeling/non_max_supression_test.py | 11 ++++++++ .../models/object_detector_preprocessor.py | 2 +- .../retinanet/retinanet_image_converter.py | 16 +++++------ keras_hub/src/utils/tensor_utils.py | 3 ++- 6 files changed, 60 insertions(+), 10 deletions(-) diff --git a/keras_hub/src/layers/modeling/anchor_generator_test.py b/keras_hub/src/layers/modeling/anchor_generator_test.py index f3bc2510de..6ce45dec33 100644 --- a/keras_hub/src/layers/modeling/anchor_generator_test.py +++ b/keras_hub/src/layers/modeling/anchor_generator_test.py @@ -1,12 +1,19 @@ +import keras import numpy as np +import pytest from absl.testing import parameterized from keras import ops +from packaging import version from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.tests.test_case import TestCase class AnchorGeneratorTest(TestCase): + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_layer_behaviors(self): images_shape = (8, 128, 128, 3) self.run_layer_test( @@ -47,6 +54,10 @@ def test_layer_behaviors(self): }, ), ) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_anchor_generator( self, bounding_box_format, diff --git a/keras_hub/src/layers/modeling/box_matcher_test.py b/keras_hub/src/layers/modeling/box_matcher_test.py index 5fdf39a7ac..a210bfa109 100644 --- a/keras_hub/src/layers/modeling/box_matcher_test.py +++ b/keras_hub/src/layers/modeling/box_matcher_test.py @@ -1,11 +1,18 @@ +import keras import numpy as np +import pytest from keras import ops +from packaging import version from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.tests.test_case import TestCase class BoxMatcherTest(TestCase): + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_box_matcher_invalid_length(self): fg_threshold = 0.5 bg_thresh_hi = 0.2 @@ -17,6 +24,10 @@ def test_box_matcher_invalid_length(self): match_values=[-3, -2, -1], ) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_box_matcher_unsorted_thresholds(self): fg_threshold = 0.5 bg_thresh_hi = 0.2 @@ -28,6 +39,10 @@ def test_box_matcher_unsorted_thresholds(self): match_values=[-3, -2, -1, 1], ) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_box_matcher_unbatched(self): sim_matrix = np.array([[0.04, 0, 0, 0], [0, 0, 1.0, 0]]) @@ -48,6 +63,10 @@ def test_box_matcher_unbatched(self): self.assertAllEqual(match_indices, [0, 2]) self.assertAllEqual(matched_values, [-2, 1]) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_box_matcher_batched(self): sim_matrix = np.array([[[0.04, 0, 0, 0], [0, 0, 1.0, 0]]]) @@ -68,6 +87,10 @@ def test_box_matcher_batched(self): self.assertAllEqual(match_indices, [[0, 2]]) self.assertAllEqual(matched_values, [[-2, 1]]) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_box_matcher_force_match(self): sim_matrix = np.array( [[0, 0.04, 0, 0.1], [0, 0, 1.0, 0], [0.1, 0, 0, 0], [0, 0, 0, 0.6]], @@ -93,6 +116,10 @@ def test_box_matcher_force_match(self): self.assertAllEqual(match_indices, [1, 2, 0, 3]) self.assertAllEqual(matched_values, [1, 1, 1, 1]) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_box_matcher_empty_gt_boxes(self): sim_matrix = np.array([[], []]) diff --git a/keras_hub/src/layers/modeling/non_max_supression_test.py b/keras_hub/src/layers/modeling/non_max_supression_test.py index 4f310a0934..8faddd4c7a 100644 --- a/keras_hub/src/layers/modeling/non_max_supression_test.py +++ b/keras_hub/src/layers/modeling/non_max_supression_test.py @@ -1,11 +1,18 @@ +import keras import numpy as np +import pytest from keras import ops +from packaging import version from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression from keras_hub.src.tests.test_case import TestCase class NonMaxSupressionTest(TestCase): + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_confidence_threshold(self): boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) classes = ops.expand_dims( @@ -32,6 +39,10 @@ def test_confidence_threshold(self): self.assertAllClose(outputs["labels"], [[0.0, 0.0], [0.0, 0.0]]) self.assertAllClose(outputs["confidence"], [[0.9, 0.5], [0.7, 0.5]]) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason=("Bbox utils are not supported before keras < 3.8.0",), + ) def test_max_detections(self): boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) classes = ops.expand_dims( diff --git a/keras_hub/src/models/object_detector_preprocessor.py b/keras_hub/src/models/object_detector_preprocessor.py index 904f1c02d3..439ddabf01 100644 --- a/keras_hub/src/models/object_detector_preprocessor.py +++ b/keras_hub/src/models/object_detector_preprocessor.py @@ -53,5 +53,5 @@ def __init__( @preprocessing_function def call(self, x, y=None, sample_weight=None): if self.image_converter: - x, y = self.image_converter(x, y) + x, y, sample_weight = self.image_converter(x, y, sample_weight) return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) diff --git a/keras_hub/src/models/retinanet/retinanet_image_converter.py b/keras_hub/src/models/retinanet/retinanet_image_converter.py index 8fbec72f4b..fb25b42eff 100644 --- a/keras_hub/src/models/retinanet/retinanet_image_converter.py +++ b/keras_hub/src/models/retinanet/retinanet_image_converter.py @@ -20,17 +20,17 @@ def __init__( bounding_box_format="yxyx", **kwargs, ): - super().__init__(**kwargs) - self.resizing = keras.layers.Resizing( - height=self.image_size[0] if image_size else None, - width=self.image_size[1] if image_size else None, + super().__init__(image_size=image_size, **kwargs) + self.resizing_bbox = keras.layers.Resizing( + height=image_size[0] if image_size else None, + width=image_size[1] if image_size else None, bounding_box_format=bounding_box_format, crop_to_aspect_ratio=self.crop_to_aspect_ratio, pad_to_aspect_ratio=self.pad_to_aspect_ratio, interpolation=self.interpolation, data_format=self.data_format, dtype=self.dtype_policy, - name="resizing", + name="resizing_bbox", ) self.image_size = image_size @@ -43,11 +43,11 @@ def __init__( @preprocessing_function def call(self, x, y=None, sample_weight=None): if y is not None: - inputs = self.resizing({"images": x, "bounding_boxes": y}) + inputs = self.resizing_bbox({"images": x, "bounding_boxes": y}) x = inputs["images"] y = inputs["bounding_boxes"] else: - x = self.resizing(x) + x = self.resizing(x) # Use while prediction pipeline # Rescaling Image if self.scale is not None: x = x * self._expand_non_channel_dims(self.scale, x) @@ -59,7 +59,7 @@ def call(self, x, y=None, sample_weight=None): if self.norm_std: x = x / self._expand_non_channel_dims(self.norm_std, x) - return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) + return x, y, sample_weight def get_config(self): config = super().get_config() diff --git a/keras_hub/src/utils/tensor_utils.py b/keras_hub/src/utils/tensor_utils.py index 4ceb5b6a38..a1dad5b162 100644 --- a/keras_hub/src/utils/tensor_utils.py +++ b/keras_hub/src/utils/tensor_utils.py @@ -266,7 +266,8 @@ def assert_tf_libs_installed(symbol_name): def assert_bounding_box_support(symbol_name): if version.parse(keras.__version__) < version.parse("3.8.0"): raise ImportError( - f"{symbol_name} equires Keras version to be 3.8.0 or higher." + f"{symbol_name} requires Keras version to be 3.8.0 or higher. " + f"Current keras version: {keras.__version__}" ) From f91c6d54b3ccb90f323caecff866fd69fc3c1bed Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Thu, 30 Jan 2025 13:30:08 -0800 Subject: [PATCH 07/15] fix tuple error for skip test cases --- .../src/layers/modeling/anchor_generator_test.py | 4 ++-- keras_hub/src/layers/modeling/box_matcher_test.py | 12 ++++++------ .../src/layers/modeling/non_max_supression_test.py | 4 ++-- keras_hub/src/utils/tensor_utils.py | 4 +++- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/keras_hub/src/layers/modeling/anchor_generator_test.py b/keras_hub/src/layers/modeling/anchor_generator_test.py index 6ce45dec33..ccb6d4a90f 100644 --- a/keras_hub/src/layers/modeling/anchor_generator_test.py +++ b/keras_hub/src/layers/modeling/anchor_generator_test.py @@ -12,7 +12,7 @@ class AnchorGeneratorTest(TestCase): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_layer_behaviors(self): images_shape = (8, 128, 128, 3) @@ -56,7 +56,7 @@ def test_layer_behaviors(self): ) @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_anchor_generator( self, diff --git a/keras_hub/src/layers/modeling/box_matcher_test.py b/keras_hub/src/layers/modeling/box_matcher_test.py index a210bfa109..68550c7ebc 100644 --- a/keras_hub/src/layers/modeling/box_matcher_test.py +++ b/keras_hub/src/layers/modeling/box_matcher_test.py @@ -11,7 +11,7 @@ class BoxMatcherTest(TestCase): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_box_matcher_invalid_length(self): fg_threshold = 0.5 @@ -26,7 +26,7 @@ def test_box_matcher_invalid_length(self): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_box_matcher_unsorted_thresholds(self): fg_threshold = 0.5 @@ -41,7 +41,7 @@ def test_box_matcher_unsorted_thresholds(self): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_box_matcher_unbatched(self): sim_matrix = np.array([[0.04, 0, 0, 0], [0, 0, 1.0, 0]]) @@ -65,7 +65,7 @@ def test_box_matcher_unbatched(self): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_box_matcher_batched(self): sim_matrix = np.array([[[0.04, 0, 0, 0], [0, 0, 1.0, 0]]]) @@ -89,7 +89,7 @@ def test_box_matcher_batched(self): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_box_matcher_force_match(self): sim_matrix = np.array( @@ -118,7 +118,7 @@ def test_box_matcher_force_match(self): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_box_matcher_empty_gt_boxes(self): sim_matrix = np.array([[], []]) diff --git a/keras_hub/src/layers/modeling/non_max_supression_test.py b/keras_hub/src/layers/modeling/non_max_supression_test.py index 8faddd4c7a..b3cd634e98 100644 --- a/keras_hub/src/layers/modeling/non_max_supression_test.py +++ b/keras_hub/src/layers/modeling/non_max_supression_test.py @@ -11,7 +11,7 @@ class NonMaxSupressionTest(TestCase): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_confidence_threshold(self): boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) @@ -41,7 +41,7 @@ def test_confidence_threshold(self): @pytest.mark.skipif( version.parse(keras.__version__) < version.parse("3.8.0"), - reason=("Bbox utils are not supported before keras < 3.8.0",), + reason="Bbox utils are not supported before keras < 3.8.0", ) def test_max_detections(self): boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) diff --git a/keras_hub/src/utils/tensor_utils.py b/keras_hub/src/utils/tensor_utils.py index a1dad5b162..614bbb4fe5 100644 --- a/keras_hub/src/utils/tensor_utils.py +++ b/keras_hub/src/utils/tensor_utils.py @@ -1,6 +1,7 @@ import contextlib import functools import inspect +import re import threading import keras @@ -264,7 +265,8 @@ def assert_tf_libs_installed(symbol_name): def assert_bounding_box_support(symbol_name): - if version.parse(keras.__version__) < version.parse("3.8.0"): + keras_version = re.sub(r"dev.*", "", keras.__version__) + if version.parse(keras_version) < version.parse("3.8.0"): raise ImportError( f"{symbol_name} requires Keras version to be 3.8.0 or higher. " f"Current keras version: {keras.__version__}" From 9c64c1f1264d9cf61cd601ce5e08f6c3d606ea79 Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Thu, 30 Jan 2025 14:16:58 -0800 Subject: [PATCH 08/15] skip retinanet test cases for keras < 3.8.0 --- .../src/models/retinanet/retinanet_label_encoder_test.py | 7 +++++++ .../src/models/retinanet/retinanet_object_detector_test.py | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py b/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py index ca4f151309..db7818e449 100644 --- a/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py +++ b/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py @@ -1,5 +1,8 @@ +import keras import numpy as np +import pytest from keras import ops +from packaging import version from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.models.retinanet.retinanet_label_encoder import ( @@ -8,6 +11,10 @@ from keras_hub.src.tests.test_case import TestCase +@pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", +) class RetinaNetLabelEncoderTest(TestCase): def setUp(self): anchor_generator = AnchorGenerator( diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py index 38e917c4a7..5e01c802a5 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py @@ -1,5 +1,7 @@ +import keras import numpy as np import pytest +from packaging import version from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone @@ -19,6 +21,10 @@ from keras_hub.src.tests.test_case import TestCase +@pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", +) class RetinaNetObjectDetectorTest(TestCase): def setUp(self): resnet_kwargs = { From d11d8d307ed00610b098b5abec6f869fc6c969db Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Thu, 30 Jan 2025 14:20:41 -0800 Subject: [PATCH 09/15] correct re string value --- keras_hub/src/utils/tensor_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_hub/src/utils/tensor_utils.py b/keras_hub/src/utils/tensor_utils.py index 614bbb4fe5..29f9ed8414 100644 --- a/keras_hub/src/utils/tensor_utils.py +++ b/keras_hub/src/utils/tensor_utils.py @@ -265,7 +265,7 @@ def assert_tf_libs_installed(symbol_name): def assert_bounding_box_support(symbol_name): - keras_version = re.sub(r"dev.*", "", keras.__version__) + keras_version = re.sub(r".dev.*", "", keras.__version__) if version.parse(keras_version) < version.parse("3.8.0"): raise ImportError( f"{symbol_name} requires Keras version to be 3.8.0 or higher. " From 7609c5bcddb55489ab1831e6727d7ad913e6d685 Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Mon, 3 Feb 2025 10:17:15 -0800 Subject: [PATCH 10/15] mark all tests at once, remove hardcoding and try with re for version parse --- .../src/layers/modeling/anchor_generator.py | 2 ++ .../layers/modeling/anchor_generator_test.py | 12 +++----- .../src/layers/modeling/box_matcher_test.py | 28 +++---------------- .../retinanet/retinanet_label_encoder.py | 21 ++++++++------ keras_hub/src/utils/tensor_utils.py | 4 +-- 5 files changed, 23 insertions(+), 44 deletions(-) diff --git a/keras_hub/src/layers/modeling/anchor_generator.py b/keras_hub/src/layers/modeling/anchor_generator.py index 418fe7b130..a37cb1229e 100644 --- a/keras_hub/src/layers/modeling/anchor_generator.py +++ b/keras_hub/src/layers/modeling/anchor_generator.py @@ -10,10 +10,12 @@ @keras_hub_export("keras_hub.layers.AnchorGenerator") class AnchorGenerator(keras.layers.Layer): """Generates anchor boxes for object detection tasks. + This layer creates a set of anchor boxes (also known as default boxes or priors) for use in object detection models, particularly those utilizing Feature Pyramid Networks (FPN). It generates anchors across multiple pyramid levels, with various scales and aspect ratios. + Feature Pyramid Levels: - Levels typically range from 2 to 6 (P2 to P7), corresponding to different resolutions of the input image. diff --git a/keras_hub/src/layers/modeling/anchor_generator_test.py b/keras_hub/src/layers/modeling/anchor_generator_test.py index ccb6d4a90f..e5918cdfda 100644 --- a/keras_hub/src/layers/modeling/anchor_generator_test.py +++ b/keras_hub/src/layers/modeling/anchor_generator_test.py @@ -9,11 +9,11 @@ from keras_hub.src.tests.test_case import TestCase +@pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", +) class AnchorGeneratorTest(TestCase): - @pytest.mark.skipif( - version.parse(keras.__version__) < version.parse("3.8.0"), - reason="Bbox utils are not supported before keras < 3.8.0", - ) def test_layer_behaviors(self): images_shape = (8, 128, 128, 3) self.run_layer_test( @@ -54,10 +54,6 @@ def test_layer_behaviors(self): }, ), ) - @pytest.mark.skipif( - version.parse(keras.__version__) < version.parse("3.8.0"), - reason="Bbox utils are not supported before keras < 3.8.0", - ) def test_anchor_generator( self, bounding_box_format, diff --git a/keras_hub/src/layers/modeling/box_matcher_test.py b/keras_hub/src/layers/modeling/box_matcher_test.py index 68550c7ebc..9af5da4b96 100644 --- a/keras_hub/src/layers/modeling/box_matcher_test.py +++ b/keras_hub/src/layers/modeling/box_matcher_test.py @@ -8,11 +8,11 @@ from keras_hub.src.tests.test_case import TestCase +@pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", +) class BoxMatcherTest(TestCase): - @pytest.mark.skipif( - version.parse(keras.__version__) < version.parse("3.8.0"), - reason="Bbox utils are not supported before keras < 3.8.0", - ) def test_box_matcher_invalid_length(self): fg_threshold = 0.5 bg_thresh_hi = 0.2 @@ -24,10 +24,6 @@ def test_box_matcher_invalid_length(self): match_values=[-3, -2, -1], ) - @pytest.mark.skipif( - version.parse(keras.__version__) < version.parse("3.8.0"), - reason="Bbox utils are not supported before keras < 3.8.0", - ) def test_box_matcher_unsorted_thresholds(self): fg_threshold = 0.5 bg_thresh_hi = 0.2 @@ -39,10 +35,6 @@ def test_box_matcher_unsorted_thresholds(self): match_values=[-3, -2, -1, 1], ) - @pytest.mark.skipif( - version.parse(keras.__version__) < version.parse("3.8.0"), - reason="Bbox utils are not supported before keras < 3.8.0", - ) def test_box_matcher_unbatched(self): sim_matrix = np.array([[0.04, 0, 0, 0], [0, 0, 1.0, 0]]) @@ -63,10 +55,6 @@ def test_box_matcher_unbatched(self): self.assertAllEqual(match_indices, [0, 2]) self.assertAllEqual(matched_values, [-2, 1]) - @pytest.mark.skipif( - version.parse(keras.__version__) < version.parse("3.8.0"), - reason="Bbox utils are not supported before keras < 3.8.0", - ) def test_box_matcher_batched(self): sim_matrix = np.array([[[0.04, 0, 0, 0], [0, 0, 1.0, 0]]]) @@ -87,10 +75,6 @@ def test_box_matcher_batched(self): self.assertAllEqual(match_indices, [[0, 2]]) self.assertAllEqual(matched_values, [[-2, 1]]) - @pytest.mark.skipif( - version.parse(keras.__version__) < version.parse("3.8.0"), - reason="Bbox utils are not supported before keras < 3.8.0", - ) def test_box_matcher_force_match(self): sim_matrix = np.array( [[0, 0.04, 0, 0.1], [0, 0, 1.0, 0], [0.1, 0, 0, 0], [0, 0, 0, 0.6]], @@ -116,10 +100,6 @@ def test_box_matcher_force_match(self): self.assertAllEqual(match_indices, [1, 2, 0, 3]) self.assertAllEqual(matched_values, [1, 1, 1, 1]) - @pytest.mark.skipif( - version.parse(keras.__version__) < version.parse("3.8.0"), - reason="Bbox utils are not supported before keras < 3.8.0", - ) def test_box_matcher_empty_gt_boxes(self): sim_matrix = np.array([[], []]) diff --git a/keras_hub/src/models/retinanet/retinanet_label_encoder.py b/keras_hub/src/models/retinanet/retinanet_label_encoder.py index 886d86422d..3f7b3dd4c2 100644 --- a/keras_hub/src/models/retinanet/retinanet_label_encoder.py +++ b/keras_hub/src/models/retinanet/retinanet_label_encoder.py @@ -3,7 +3,6 @@ import keras from keras import ops -# TODO: https://github.com/keras-team/keras-hub/issues/1965 from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.utils import tensor_utils @@ -27,7 +26,8 @@ class RetinaNetLabelEncoder(keras.layers.Layer): anchor_generator: A `keras_hub.layers.AnchorGenerator`. bounding_box_format: str. Ground truth format of bounding boxes. encoding_format: str. The desired target encoding format for the boxes. - TODO: https://github.com/keras-team/keras-hub/issues/1907 + Refer: `keras.utils.bounding_boxes.convert_format` for supported + formats. positive_threshold: float. the threshold to set an anchor to positive match to gt box. Values above it are positive matches. Defaults to `0.5` @@ -110,7 +110,7 @@ def call(self, images, gt_boxes, gt_classes): "support unbatched inputs for the `images` argument. " f"Received `shape(images)={images_shape}`." ) - height, width, _ = images_shape[1:] + height, width, channels = images_shape[1:] if len(ops.shape(gt_classes)) == 2: gt_classes = ops.expand_dims(gt_classes, axis=-1) @@ -119,14 +119,16 @@ def call(self, images, gt_boxes, gt_classes): anchor_boxes = ops.concatenate(list(anchor_boxes.values()), axis=0) box_targets, class_targets = self._encode_sample( - gt_boxes, gt_classes, anchor_boxes, height, width + gt_boxes, gt_classes, anchor_boxes, height, width, channels ) box_targets = ops.reshape( box_targets, (-1, ops.shape(box_targets)[1], 4) ) return box_targets, class_targets - def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, height, width): + def _encode_sample( + self, gt_boxes, gt_classes, anchor_boxes, height, width, channels + ): """Creates box and classification targets for a batched sample. Matches ground truth boxes to anchor boxes based on IOU. @@ -146,8 +148,9 @@ def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, height, width): anchor_boxes: A Tensor with the shape `[total_anchors, 4]` representing all the anchor boxes for a given input image shape, where each anchor box is of the format `[x, y, width, height]`. - height: int. - width: int. + height: int. Height of the inputs. + width: int. Width of the inputs. + channels: int. Number of channesl in the inputs. Returns: Encoded bounding boxes in the format of `center_yxwh` and @@ -164,7 +167,7 @@ def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, height, width): anchor_boxes, gt_boxes, bounding_box_format=self.bounding_box_format, - image_shape=(height, width, 3), + image_shape=(height, width, channels), ) matched_gt_idx, matched_vals = self.box_matcher(iou_matrix) @@ -185,7 +188,7 @@ def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, height, width): box_format=self.bounding_box_format, encoding_format=self.encoding_format, variance=self.box_variance, - image_shape=(height, width, 3), + image_shape=(height, width, channels), ) matched_gt_cls_ids = tensor_utils.target_gather( diff --git a/keras_hub/src/utils/tensor_utils.py b/keras_hub/src/utils/tensor_utils.py index 29f9ed8414..a1dad5b162 100644 --- a/keras_hub/src/utils/tensor_utils.py +++ b/keras_hub/src/utils/tensor_utils.py @@ -1,7 +1,6 @@ import contextlib import functools import inspect -import re import threading import keras @@ -265,8 +264,7 @@ def assert_tf_libs_installed(symbol_name): def assert_bounding_box_support(symbol_name): - keras_version = re.sub(r".dev.*", "", keras.__version__) - if version.parse(keras_version) < version.parse("3.8.0"): + if version.parse(keras.__version__) < version.parse("3.8.0"): raise ImportError( f"{symbol_name} requires Keras version to be 3.8.0 or higher. " f"Current keras version: {keras.__version__}" From b6b11915f7de66cd3bfe918481db336562d9e0e6 Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Mon, 3 Feb 2025 10:22:01 -0800 Subject: [PATCH 11/15] remove extra resizing layer --- keras_hub/src/models/retinanet/retinanet_image_converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/keras_hub/src/models/retinanet/retinanet_image_converter.py b/keras_hub/src/models/retinanet/retinanet_image_converter.py index fb25b42eff..927a8ad6d5 100644 --- a/keras_hub/src/models/retinanet/retinanet_image_converter.py +++ b/keras_hub/src/models/retinanet/retinanet_image_converter.py @@ -20,8 +20,8 @@ def __init__( bounding_box_format="yxyx", **kwargs, ): - super().__init__(image_size=image_size, **kwargs) - self.resizing_bbox = keras.layers.Resizing( + super().__init__(**kwargs) + self.resizing = keras.layers.Resizing( height=image_size[0] if image_size else None, width=image_size[1] if image_size else None, bounding_box_format=bounding_box_format, @@ -43,7 +43,7 @@ def __init__( @preprocessing_function def call(self, x, y=None, sample_weight=None): if y is not None: - inputs = self.resizing_bbox({"images": x, "bounding_boxes": y}) + inputs = self.resizing({"images": x, "bounding_boxes": y}) x = inputs["images"] y = inputs["bounding_boxes"] else: From 19b0f7a5688ad5684304a1291dd023db471dde0b Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Mon, 3 Feb 2025 13:20:17 -0800 Subject: [PATCH 12/15] add previous alias for backward compatibility --- keras_hub/src/models/object_detector.py | 7 ++++++- keras_hub/src/models/object_detector_preprocessor.py | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/keras_hub/src/models/object_detector.py b/keras_hub/src/models/object_detector.py index e248ced22c..d9efef7e32 100644 --- a/keras_hub/src/models/object_detector.py +++ b/keras_hub/src/models/object_detector.py @@ -4,7 +4,12 @@ from keras_hub.src.models.task import Task -@keras_hub_export("keras_hub.models.ObjectDetector") +@keras_hub_export( + [ + "keras_hub.models.ObjectDetector", + "keras_hub.models.ImageObjectDetector", + ] +) class ObjectDetector(Task): """Base class for all image object detection tasks. diff --git a/keras_hub/src/models/object_detector_preprocessor.py b/keras_hub/src/models/object_detector_preprocessor.py index 439ddabf01..a342f02512 100644 --- a/keras_hub/src/models/object_detector_preprocessor.py +++ b/keras_hub/src/models/object_detector_preprocessor.py @@ -5,7 +5,12 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function -@keras_hub_export("keras_hub.models.ObjectDetectorPreprocessor") +@keras_hub_export( + [ + "keras_hub.models.ObjectDetectorPreprocessor", + "keras_hub.models.ImageObjectDetectorPreprocessor", + ] +) class ObjectDetectorPreprocessor(Preprocessor): """Base class for object detector preprocessing layers. From cf9f7475b53ed9070841cb7d3ac80f73a36c1917 Mon Sep 17 00:00:00 2001 From: Sravana Neeli Date: Mon, 3 Feb 2025 13:22:47 -0800 Subject: [PATCH 13/15] api changes --- keras_hub/api/models/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index c1a4ce519d..248232312d 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -229,9 +229,15 @@ MobileNetImageClassifier, ) from keras_hub.src.models.object_detector import ObjectDetector +from keras_hub.src.models.object_detector import ( + ObjectDetector as ImageObjectDetector, +) from keras_hub.src.models.object_detector_preprocessor import ( ObjectDetectorPreprocessor, ) +from keras_hub.src.models.object_detector_preprocessor import ( + ObjectDetectorPreprocessor as ImageObjectDetectorPreprocessor, +) from keras_hub.src.models.opt.opt_backbone import OPTBackbone from keras_hub.src.models.opt.opt_causal_lm import OPTCausalLM from keras_hub.src.models.opt.opt_causal_lm_preprocessor import ( From fe0eb88d0933bc51230d44656214426a2917824e Mon Sep 17 00:00:00 2001 From: Matt Watson Date: Wed, 5 Feb 2025 16:21:48 -0800 Subject: [PATCH 14/15] Simpler bounding box preprocessing --- .../layers/preprocessing/image_converter.py | 24 ++++++- .../preprocessing/image_converter_test.py | 18 ++--- .../models/object_detector_preprocessor.py | 17 ++++- .../retinanet/retinanet_image_converter.py | 68 +++---------------- .../retinanet/retinanet_object_detector.py | 4 +- 5 files changed, 53 insertions(+), 78 deletions(-) diff --git a/keras_hub/src/layers/preprocessing/image_converter.py b/keras_hub/src/layers/preprocessing/image_converter.py index 2edeffb666..d108a26fb1 100644 --- a/keras_hub/src/layers/preprocessing/image_converter.py +++ b/keras_hub/src/layers/preprocessing/image_converter.py @@ -64,6 +64,12 @@ class ImageConverter(PreprocessingLayer): interpolation: String, the interpolation method. Supports `"bilinear"`, `"nearest"`, `"bicubic"`, `"lanczos3"`, `"lanczos5"`. Defaults to `"bilinear"`. + bounding_box_format: A string specifying the format of the bounding + boxes, one of `"xyxy"`, `"rel_xyxy"`, `"xywh"`, `"center_xywh"`, + `"yxyx"`, `"rel_yxyx"`. Specifies the format of the bounding boxes + which will be resized to `image_size` along with the image. To pass + bounding boxed to this layer, pass a dict with keys `"images"` and + `"bounding_boxes"` when calling the layer. data_format: String, either `"channels_last"` or `"channels_first"`. The ordering of the dimensions in the inputs. `"channels_last"` corresponds to inputs with shape `(batch, height, width, channels)` @@ -100,6 +106,7 @@ def __init__( crop_to_aspect_ratio=True, pad_to_aspect_ratio=False, interpolation="bilinear", + bounding_box_format="yxyx", data_format=None, **kwargs, ): @@ -128,6 +135,7 @@ def __init__( pad_to_aspect_ratio=pad_to_aspect_ratio, interpolation=interpolation, data_format=data_format, + bounding_box_format=bounding_box_format, dtype=self.dtype_policy, name="resizing", ) @@ -136,6 +144,7 @@ def __init__( self.crop_to_aspect_ratio = crop_to_aspect_ratio self.pad_to_aspect_ratio = pad_to_aspect_ratio self.interpolation = interpolation + self.bounding_box_format = bounding_box_format self.data_format = standardize_data_format(data_format) @property @@ -154,14 +163,22 @@ def image_size(self, value): @preprocessing_function def call(self, inputs): - x = inputs if self.image_size is not None: - x = self.resizing(x) + inputs = self.resizing(inputs) + # Allow dictionary input for handling bounding boxes. + if isinstance(inputs, dict): + x = inputs["images"] + else: + x = inputs if self.scale is not None: x = x * self._expand_non_channel_dims(self.scale, x) if self.offset is not None: x = x + self._expand_non_channel_dims(self.offset, x) - return x + if isinstance(inputs, dict): + inputs["images"] = x + else: + inputs = x + return inputs def _expand_non_channel_dims(self, value, inputs): unbatched = len(ops.shape(inputs)) == 3 @@ -192,6 +209,7 @@ def get_config(self): "interpolation": self.interpolation, "crop_to_aspect_ratio": self.crop_to_aspect_ratio, "pad_to_aspect_ratio": self.pad_to_aspect_ratio, + "bounding_box_format": self.bounding_box_format, } ) return config diff --git a/keras_hub/src/layers/preprocessing/image_converter_test.py b/keras_hub/src/layers/preprocessing/image_converter_test.py index 1fdc97e031..fccc050d42 100644 --- a/keras_hub/src/layers/preprocessing/image_converter_test.py +++ b/keras_hub/src/layers/preprocessing/image_converter_test.py @@ -8,10 +8,10 @@ from keras import ops from keras_hub.src.layers.preprocessing.image_converter import ImageConverter -from keras_hub.src.models.pali_gemma.pali_gemma_image_converter import ( - PaliGemmaImageConverter, -) from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone +from keras_hub.src.models.resnet.resnet_image_converter import ( + ResNetImageConverter, +) from keras_hub.src.tests.test_case import TestCase @@ -80,17 +80,17 @@ def test_config(self): self.assertAllClose(converter(test_batch), clone(test_batch)) def test_preset_accessors(self): - pali_gemma_presets = set(PaliGemmaImageConverter.presets.keys()) + resnet_presets = set(ResNetImageConverter.presets.keys()) all_presets = set(ImageConverter.presets.keys()) - self.assertContainsSubset(pali_gemma_presets, all_presets) - self.assertIn("pali_gemma_3b_mix_224", pali_gemma_presets) - self.assertIn("pali_gemma_3b_mix_224", all_presets) + self.assertContainsSubset(resnet_presets, all_presets) + self.assertIn("resnet_50_imagenet", resnet_presets) + self.assertIn("resnet_50_imagenet", all_presets) @pytest.mark.large def test_from_preset(self): self.assertIsInstance( - ImageConverter.from_preset("pali_gemma_3b_mix_224"), - PaliGemmaImageConverter, + ImageConverter.from_preset("resnet_50_imagenet"), + ResNetImageConverter, ) @pytest.mark.large diff --git a/keras_hub/src/models/object_detector_preprocessor.py b/keras_hub/src/models/object_detector_preprocessor.py index a342f02512..493201c419 100644 --- a/keras_hub/src/models/object_detector_preprocessor.py +++ b/keras_hub/src/models/object_detector_preprocessor.py @@ -23,7 +23,7 @@ class ObjectDetectorPreprocessor(Preprocessor): `sample_weight`. `x`, the first input, should always be included. It can be a image or batch of images. See examples below. `y` and `sample_weight` are optional inputs that will be passed through unaltered. Usually, `y` - willbe the a dict of `{"boxes": Tensor(batch_size, num_boxes, 4), + will be the a dict of `{"boxes": Tensor(batch_size, num_boxes, 4), "classes": (batch_size, num_boxes)}. The layer will returns either `x`, an `(x, y)` tuple if labels were @@ -57,6 +57,17 @@ def __init__( @preprocessing_function def call(self, x, y=None, sample_weight=None): - if self.image_converter: - x, y, sample_weight = self.image_converter(x, y, sample_weight) + if y is None: + x = self.image_converter(x) + else: + # Pass bounding boxes through image converter in the dictionary + # with keys format standardized by core Keras. + output = self.image_converter( + { + "images": x, + "bounding_boxes": y, + } + ) + x = output["images"] + y = output["bounding_boxes"] return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) diff --git a/keras_hub/src/models/retinanet/retinanet_image_converter.py b/keras_hub/src/models/retinanet/retinanet_image_converter.py index 927a8ad6d5..c32ea41cad 100644 --- a/keras_hub/src/models/retinanet/retinanet_image_converter.py +++ b/keras_hub/src/models/retinanet/retinanet_image_converter.py @@ -1,9 +1,6 @@ -import keras - from keras_hub.src.api_export import keras_hub_export from keras_hub.src.layers.preprocessing.image_converter import ImageConverter from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone -from keras_hub.src.utils.tensor_utils import preprocessing_function @keras_hub_export("keras_hub.layers.RetinaNetImageConverter") @@ -12,62 +9,13 @@ class RetinaNetImageConverter(ImageConverter): def __init__( self, - image_size=None, - scale=None, - offset=None, - norm_mean=[0.485, 0.456, 0.406], - norm_std=[0.229, 0.224, 0.225], - bounding_box_format="yxyx", + *args, **kwargs, ): - super().__init__(**kwargs) - self.resizing = keras.layers.Resizing( - height=image_size[0] if image_size else None, - width=image_size[1] if image_size else None, - bounding_box_format=bounding_box_format, - crop_to_aspect_ratio=self.crop_to_aspect_ratio, - pad_to_aspect_ratio=self.pad_to_aspect_ratio, - interpolation=self.interpolation, - data_format=self.data_format, - dtype=self.dtype_policy, - name="resizing_bbox", - ) - - self.image_size = image_size - self.scale = scale - self.offset = offset - self.norm_mean = norm_mean - self.norm_std = norm_std - self.bounding_box_format = bounding_box_format - - @preprocessing_function - def call(self, x, y=None, sample_weight=None): - if y is not None: - inputs = self.resizing({"images": x, "bounding_boxes": y}) - x = inputs["images"] - y = inputs["bounding_boxes"] - else: - x = self.resizing(x) # Use while prediction pipeline - # Rescaling Image - if self.scale is not None: - x = x * self._expand_non_channel_dims(self.scale, x) - if self.offset is not None: - x = x + self._expand_non_channel_dims(self.offset, x) - # By default normalize using imagenet mean and std - if self.norm_mean: - x = x - self._expand_non_channel_dims(self.norm_mean, x) - if self.norm_std: - x = x / self._expand_non_channel_dims(self.norm_std, x) - - return x, y, sample_weight - - def get_config(self): - config = super().get_config() - config.update( - { - "norm_mean": self.norm_mean, - "norm_std": self.norm_std, - "bounding_box_format": self.bounding_box_format, - } - ) - return config + # TODO: update presets and remove these old config options. They were + # never needed. + if "norm_mean" in kwargs: + kwargs["offset"] = [-x for x in kwargs.pop("norm_mean")] + if "norm_std" in kwargs: + kwargs["scale"] = [1.0 / x for x in kwargs.pop("norm_std")] + super().__init__(*args, **kwargs) diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector.py b/keras_hub/src/models/retinanet/retinanet_object_detector.py index efc4983512..4d05c0cc82 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector.py @@ -44,9 +44,7 @@ class RetinaNetObjectDetector(ObjectDetector): arguments. num_classes: int. The number of object classes to be detected. bounding_box_format: str. Dataset bounding box format (e.g., "xyxy", - "yxyx"). The supported formats are - refer TODO: https://github.com/keras-team/keras-hub/issues/1907. - Defaults to `yxyx`. + "yxyx"). Defaults to `yxyx`. label_encoder: Optional. A `RetinaNetLabelEncoder` instance. Encodes ground truth boxes and classes into training targets. It matches ground truth boxes to anchors based on IoU and encodes box From da6ca8eeb5ae5a3421d080518905055bd38a8005 Mon Sep 17 00:00:00 2001 From: Matt Watson Date: Wed, 5 Feb 2025 17:47:56 -0800 Subject: [PATCH 15/15] Fix bounding box check for old keras --- keras_hub/src/layers/preprocessing/image_converter.py | 6 +++++- keras_hub/src/utils/tensor_utils.py | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/keras_hub/src/layers/preprocessing/image_converter.py b/keras_hub/src/layers/preprocessing/image_converter.py index d108a26fb1..ed5202adba 100644 --- a/keras_hub/src/layers/preprocessing/image_converter.py +++ b/keras_hub/src/layers/preprocessing/image_converter.py @@ -14,6 +14,7 @@ from keras_hub.src.utils.preset_utils import get_preset_loader from keras_hub.src.utils.preset_utils import get_preset_saver from keras_hub.src.utils.python_utils import classproperty +from keras_hub.src.utils.tensor_utils import check_bounding_box_support from keras_hub.src.utils.tensor_utils import preprocessing_function @@ -128,6 +129,9 @@ def __init__( # Create the `Resizing` layer here even if it's not being used. That # allows us to make `image_size` a settable property. + resizing_kwargs = {} + if check_bounding_box_support(): + resizing_kwargs["bounding_box_format"] = bounding_box_format self.resizing = keras.layers.Resizing( height=image_size[0] if image_size else None, width=image_size[1] if image_size else None, @@ -135,9 +139,9 @@ def __init__( pad_to_aspect_ratio=pad_to_aspect_ratio, interpolation=interpolation, data_format=data_format, - bounding_box_format=bounding_box_format, dtype=self.dtype_policy, name="resizing", + **resizing_kwargs, ) self.scale = scale self.offset = offset diff --git a/keras_hub/src/utils/tensor_utils.py b/keras_hub/src/utils/tensor_utils.py index a1dad5b162..9e2a01e9da 100644 --- a/keras_hub/src/utils/tensor_utils.py +++ b/keras_hub/src/utils/tensor_utils.py @@ -263,8 +263,12 @@ def assert_tf_libs_installed(symbol_name): ) +def check_bounding_box_support(): + return version.parse(keras.__version__) >= version.parse("3.8.0") + + def assert_bounding_box_support(symbol_name): - if version.parse(keras.__version__) < version.parse("3.8.0"): + if not check_bounding_box_support(): raise ImportError( f"{symbol_name} requires Keras version to be 3.8.0 or higher. " f"Current keras version: {keras.__version__}"