diff --git a/keras_hub/api/__init__.py b/keras_hub/api/__init__.py index 7e44f01381..fa8636ab70 100644 --- a/keras_hub/api/__init__.py +++ b/keras_hub/api/__init__.py @@ -4,7 +4,6 @@ since your modifications would be overwritten. """ -from keras_hub.api import bounding_box from keras_hub.api import layers from keras_hub.api import metrics from keras_hub.api import models diff --git a/keras_hub/api/bounding_box/__init__.py b/keras_hub/api/bounding_box/__init__.py deleted file mode 100644 index dfdea4305c..0000000000 --- a/keras_hub/api/bounding_box/__init__.py +++ /dev/null @@ -1,23 +0,0 @@ -"""DO NOT EDIT. - -This file was autogenerated. Do not edit it by hand, -since your modifications would be overwritten. -""" - -from keras_hub.src.bounding_box.converters import convert_format -from keras_hub.src.bounding_box.formats import CENTER_XYWH -from keras_hub.src.bounding_box.formats import REL_XYWH -from keras_hub.src.bounding_box.formats import REL_XYXY -from keras_hub.src.bounding_box.formats import REL_YXYX -from keras_hub.src.bounding_box.formats import XYWH -from keras_hub.src.bounding_box.formats import XYXY -from keras_hub.src.bounding_box.formats import YXYX -from keras_hub.src.bounding_box.iou import compute_ciou -from keras_hub.src.bounding_box.iou import compute_iou -from keras_hub.src.bounding_box.to_dense import to_dense -from keras_hub.src.bounding_box.to_ragged import to_ragged -from keras_hub.src.bounding_box.utils import as_relative -from keras_hub.src.bounding_box.utils import clip_boxes -from keras_hub.src.bounding_box.utils import clip_to_image -from keras_hub.src.bounding_box.utils import is_relative -from keras_hub.src.bounding_box.validate_format import validate_format diff --git a/keras_hub/api/layers/__init__.py b/keras_hub/api/layers/__init__.py index 0e0d31d3a7..928becf3c0 100644 --- a/keras_hub/api/layers/__init__.py +++ b/keras_hub/api/layers/__init__.py @@ -5,11 +5,14 @@ """ from keras_hub.src.layers.modeling.alibi_bias import AlibiBias +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.layers.modeling.cached_multi_head_attention import ( CachedMultiHeadAttention, ) from keras_hub.src.layers.modeling.f_net_encoder import FNetEncoder from keras_hub.src.layers.modeling.masked_lm_head import MaskedLMHead +from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression from keras_hub.src.layers.modeling.position_embedding import PositionEmbedding from keras_hub.src.layers.modeling.reversible_embedding import ( ReversibleEmbedding, @@ -55,7 +58,6 @@ from keras_hub.src.models.resnet.resnet_image_converter import ( ResNetImageConverter, ) -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator from keras_hub.src.models.retinanet.retinanet_image_converter import ( RetinaNetImageConverter, ) diff --git a/keras_hub/api/models/__init__.py b/keras_hub/api/models/__init__.py index 7c7adbf97c..c1a4ce519d 100644 --- a/keras_hub/api/models/__init__.py +++ b/keras_hub/api/models/__init__.py @@ -193,10 +193,6 @@ from keras_hub.src.models.image_classifier_preprocessor import ( ImageClassifierPreprocessor, ) -from keras_hub.src.models.image_object_detector import ImageObjectDetector -from keras_hub.src.models.image_object_detector_preprocessor import ( - ImageObjectDetectorPreprocessor, -) from keras_hub.src.models.image_segmenter import ImageSegmenter from keras_hub.src.models.image_segmenter_preprocessor import ( ImageSegmenterPreprocessor, @@ -232,6 +228,10 @@ from keras_hub.src.models.mobilenet.mobilenet_image_classifier import ( MobileNetImageClassifier, ) +from keras_hub.src.models.object_detector import ObjectDetector +from keras_hub.src.models.object_detector_preprocessor import ( + ObjectDetectorPreprocessor, +) from keras_hub.src.models.opt.opt_backbone import OPTBackbone from keras_hub.src.models.opt.opt_causal_lm import OPTCausalLM from keras_hub.src.models.opt.opt_causal_lm_preprocessor import ( diff --git a/keras_hub/src/bounding_box/__init__.py b/keras_hub/src/bounding_box/__init__.py deleted file mode 100644 index 78f451fd0d..0000000000 --- a/keras_hub/src/bounding_box/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# TODO: Once all bounding boxes are moved to keras repostory remove the -# bounding box folder. diff --git a/keras_hub/src/bounding_box/converters.py b/keras_hub/src/bounding_box/converters.py deleted file mode 100644 index 7c347a9815..0000000000 --- a/keras_hub/src/bounding_box/converters.py +++ /dev/null @@ -1,606 +0,0 @@ -"""Converter functions for working with bounding box formats.""" - -import keras -from keras import ops - -from keras_hub.src.api_export import keras_hub_export - -try: - import tensorflow as tf -except ImportError: - tf = None - - -# Internal exception to propagate the fact images was not passed to a converter -# that needs it. -class RequiresImagesException(Exception): - pass - - -ALL_AXES = 4 - - -def encode_box_to_deltas( - anchors, - boxes, - anchor_format, - box_format, - encoding_format="center_yxhw", - variance=None, - image_shape=None, -): - """Encodes bounding boxes relative to anchors as deltas. - - This function calculates the deltas that represent the difference between - bounding boxes and provided anchors. Deltas encode the offsets and scaling - factors to apply to anchors to obtain the target boxes. - - Boxes and anchors are first converted to the specified `encoding_format` - (defaulting to `center_yxhw`) for consistent delta representation. - - Args: - anchors: `Tensors`. Anchor boxes with shape of `(N, 4)` where N is the - number of anchors. - boxes: `Tensors` Bounding boxes to encode. Boxes can be of shape - `(B, N, 4)` or `(N, 4)`. - anchor_format: str. The format of the input `anchors` - (e.g., "xyxy", "xywh", etc.). - box_format: str. The format of the input `boxes` - (e.g., "xyxy", "xywh", etc.). - encoding_format: str. The intermediate format to which boxes and anchors - are converted before delta calculation. Defaults to "center_yxhw". - variance: `List[float]`. A 4-element array/tensor representing variance - factors to scale the box deltas. If provided, the calculated deltas - are divided by the variance. Defaults to None. - image_shape: `Tuple[int]`. The shape of the image (height, width, 3). - When using relative bounding box format for `box_format` the - `image_shape` is used for normalization. - Returns: - Encoded box deltas. The return type matches the `encode_format`. - - Raises: - ValueError: If `variance` is not None and its length is not 4. - ValueError: If `encoding_format` is not `"center_xywh"` or - `"center_yxhw"`. - - """ - if variance is not None: - variance = ops.convert_to_tensor(variance, "float32") - var_len = variance.shape[-1] - - if var_len != 4: - raise ValueError(f"`variance` must be length 4, got {variance}") - - if encoding_format not in ["center_xywh", "center_yxhw"]: - raise ValueError( - "`encoding_format` should be one of 'center_xywh' or " - f"'center_yxhw', got {encoding_format}" - ) - - encoded_anchors = convert_format( - anchors, - source=anchor_format, - target=encoding_format, - image_shape=image_shape, - ) - boxes = convert_format( - boxes, - source=box_format, - target=encoding_format, - image_shape=image_shape, - ) - anchor_dimensions = ops.maximum( - encoded_anchors[..., 2:], keras.backend.epsilon() - ) - box_dimensions = ops.maximum(boxes[..., 2:], keras.backend.epsilon()) - # anchors be unbatched, boxes can either be batched or unbatched. - boxes_delta = ops.concatenate( - [ - (boxes[..., :2] - encoded_anchors[..., :2]) / anchor_dimensions, - ops.log(box_dimensions / anchor_dimensions), - ], - axis=-1, - ) - if variance is not None: - boxes_delta /= variance - return boxes_delta - - -def decode_deltas_to_boxes( - anchors, - boxes_delta, - anchor_format, - box_format, - encoded_format="center_yxhw", - variance=None, - image_shape=None, -): - """Converts bounding boxes from delta format to the specified `box_format`. - - This function decodes bounding box deltas relative to anchors to obtain the - final bounding box coordinates. The boxes are encoded in a specific - `encoded_format` (center_yxhw by default) during the decoding process. - This allows flexibility in how the deltas are applied to the anchors. - - Args: - anchors: Can be `Tensors` or `Dict[Tensors]` where keys are level - indices and values are corresponding anchor boxes. - The shape of the array/tensor should be `(N, 4)` where N is the - number of anchors. - boxes_delta Can be `Tensors` or `Dict[Tensors]` Bounding box deltas - must have the same type and structure as `anchors`. The - shape of the array/tensor can be `(N, 4)` or `(B, N, 4)` where N is - the number of boxes. - anchor_format: str. The format of the input `anchors`. - (e.g., `"xyxy"`, `"xywh"`, etc.) - box_format: str. The desired format for the output boxes. - (e.g., `"xyxy"`, `"xywh"`, etc.) - encoded_format: str. Raw output format from regression head. Defaults - to `"center_yxhw"`. - variance: `List[floats]`. A 4-element array/tensor representing - variance factors to scale the box deltas. If provided, the deltas - are multiplied by the variance before being applied to the anchors. - Defaults to None. - image_shape: The shape of the image (height, width). This is needed - if normalization to image size is required when converting between - formats. Defaults to None. - - Returns: - Decoded box coordinates. The return type matches the `box_format`. - - Raises: - ValueError: If `variance` is not None and its length is not 4. - ValueError: If `encoded_format` is not `"center_xywh"` or - `"center_yxhw"`. - - """ - if variance is not None: - variance = ops.convert_to_tensor(variance, "float32") - var_len = variance.shape[-1] - - if var_len != 4: - raise ValueError(f"`variance` must be length 4, got {variance}") - - if encoded_format not in ["center_xywh", "center_yxhw"]: - raise ValueError( - f"`encoded_format` should be 'center_xywh' or 'center_yxhw', " - f"but got '{encoded_format}'." - ) - - def decode_single_level(anchor, box_delta): - encoded_anchor = convert_format( - anchor, - source=anchor_format, - target=encoded_format, - image_shape=image_shape, - ) - if variance is not None: - box_delta = box_delta * variance - # anchors be unbatched, boxes can either be batched or unbatched. - box = ops.concatenate( - [ - box_delta[..., :2] * encoded_anchor[..., 2:] - + encoded_anchor[..., :2], - ops.exp(box_delta[..., 2:]) * encoded_anchor[..., 2:], - ], - axis=-1, - ) - box = convert_format( - box, - source=encoded_format, - target=box_format, - image_shape=image_shape, - ) - return box - - if isinstance(anchors, dict) and isinstance(boxes_delta, dict): - boxes = {} - for lvl, anchor in anchors.items(): - boxes[lvl] = decode_single_level(anchor, boxes_delta[lvl]) - return boxes - else: - return decode_single_level(anchors, boxes_delta) - - -def _center_yxhw_to_xyxy(boxes, images=None, image_shape=None): - y, x, height, width = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [x - width / 2.0, y - height / 2.0, x + width / 2.0, y + height / 2.0], - axis=-1, - ) - - -def _center_xywh_to_xyxy(boxes, images=None, image_shape=None): - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [x - width / 2.0, y - height / 2.0, x + width / 2.0, y + height / 2.0], - axis=-1, - ) - - -def _xywh_to_xyxy(boxes, images=None, image_shape=None): - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([x, y, x + width, y + height], axis=-1) - - -def _xyxy_to_center_yxhw(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - (top + bottom) / 2.0, - (left + right) / 2.0, - bottom - top, - right - left, - ], - axis=-1, - ) - - -def _rel_xywh_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - x, y, width, height = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - image_width * x, - image_height * y, - image_width * (x + width), - image_height * (y + height), - ], - axis=-1, - ) - - -def _xyxy_no_op(boxes, images=None, image_shape=None): - return boxes - - -def _xyxy_to_xywh(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [left, top, right - left, bottom - top], - axis=-1, - ) - - -def _xyxy_to_rel_xywh(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - left, right = ( - left / image_width, - right / image_width, - ) - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [left, top, right - left, bottom - top], - axis=-1, - ) - - -def _xyxy_to_center_xywh(boxes, images=None, image_shape=None): - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate( - [ - (left + right) / 2.0, - (top + bottom) / 2.0, - right - left, - bottom - top, - ], - axis=-1, - ) - - -def _rel_xyxy_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left * image_width, right * image_width - top, bottom = top * image_height, bottom * image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _xyxy_to_rel_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left / image_width, right / image_width - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _yxyx_to_xyxy(boxes, images=None, image_shape=None): - y1, x1, y2, x2 = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([x1, y1, x2, y2], axis=-1) - - -def _rel_yxyx_to_xyxy(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - top, left, bottom, right = ops.split( - boxes, - ALL_AXES, - axis=-1, - ) - left, right = left * image_width, right * image_width - top, bottom = top * image_height, bottom * image_height - return ops.concatenate( - [left, top, right, bottom], - axis=-1, - ) - - -def _xyxy_to_yxyx(boxes, images=None, image_shape=None): - x1, y1, x2, y2 = ops.split(boxes, ALL_AXES, axis=-1) - return ops.concatenate([y1, x1, y2, x2], axis=-1) - - -def _xyxy_to_rel_yxyx(boxes, images=None, image_shape=None): - image_height, image_width = _image_shape(images, image_shape, boxes) - left, top, right, bottom = ops.split(boxes, ALL_AXES, axis=-1) - left, right = left / image_width, right / image_width - top, bottom = top / image_height, bottom / image_height - return ops.concatenate( - [top, left, bottom, right], - axis=-1, - ) - - -TO_XYXY_CONVERTERS = { - "xywh": _xywh_to_xyxy, - "center_xywh": _center_xywh_to_xyxy, - "center_yxhw": _center_yxhw_to_xyxy, - "rel_xywh": _rel_xywh_to_xyxy, - "xyxy": _xyxy_no_op, - "rel_xyxy": _rel_xyxy_to_xyxy, - "yxyx": _yxyx_to_xyxy, - "rel_yxyx": _rel_yxyx_to_xyxy, -} - -FROM_XYXY_CONVERTERS = { - "xywh": _xyxy_to_xywh, - "center_xywh": _xyxy_to_center_xywh, - "center_yxhw": _xyxy_to_center_yxhw, - "rel_xywh": _xyxy_to_rel_xywh, - "xyxy": _xyxy_no_op, - "rel_xyxy": _xyxy_to_rel_xyxy, - "yxyx": _xyxy_to_yxyx, - "rel_yxyx": _xyxy_to_rel_yxyx, -} - - -@keras_hub_export("keras_hub.bounding_box.convert_format") -def convert_format( - boxes, source, target, images=None, image_shape=None, dtype="float32" -): - f"""Converts bounding_boxes from one format to another. - - Supported formats are: - - `"xyxy"`, also known as `corners` format. In this format the first four - axes represent `[left, top, right, bottom]` in that order. - - `"rel_xyxy"`. In this format, the axes are the same as `"xyxy"` but the x - coordinates are normalized using the image width, and the y axes the - image height. All values in `rel_xyxy` are in the range `(0, 1)`. - - `"xywh"`. In this format the first four axes represent - `[left, top, width, height]`. - - `"rel_xywh". In this format the first four axes represent - [left, top, width, height], just like `"xywh"`. Unlike `"xywh"`, the - values are in the range (0, 1) instead of absolute pixel values. - - `"center_xyWH"`. In this format the first two coordinates represent the x - and y coordinates of the center of the bounding box, while the last two - represent the width and height of the bounding box. - - `"center_yxHW"`. In this format the first two coordinates represent the y - and x coordinates of the center of the bounding box, while the last two - represent the height and width of the bounding box. - - `"yxyx"`. In this format the first four axes represent - [top, left, bottom, right] in that order. - - `"rel_yxyx"`. In this format, the axes are the same as `"yxyx"` but the x - coordinates are normalized using the image width, and the y axes the - image height. All values in `rel_yxyx` are in the range (0, 1). - Formats are case insensitive. It is recommended that you capitalize width - and height to maximize the visual difference between `"xyWH"` and `"xyxy"`. - - Relative formats, abbreviated `rel`, make use of the shapes of the `images` - passed. In these formats, the coordinates, widths, and heights are all - specified as percentages of the host image. `images` may be a ragged - Tensor. Note that using a ragged Tensor for images may cause a substantial - performance loss, as each image will need to be processed separately due to - the mismatching image shapes. - - Example: - - ```python - boxes = load_coco_dataset() - boxes_in_xywh = keras_hub.bounding_box.convert_format( - boxes, - source='xyxy', - target='xyWH' - ) - ``` - - Args: - boxes: tensor representing bounding boxes in the format specified in - the `source` parameter. `boxes` can optionally have extra - dimensions stacked on the final axis to store metadata. boxes - should be a 3D tensor, with the shape `[batch_size, num_boxes, 4]`. - Alternatively, boxes can be a dictionary with key 'boxes' containing - a tensor matching the aforementioned spec. - source:One of {" ".join([f'"{f}"' for f in TO_XYXY_CONVERTERS.keys()])}. - Used to specify the original format of the `boxes` parameter. - target:One of {" ".join([f'"{f}"' for f in TO_XYXY_CONVERTERS.keys()])}. - Used to specify the destination format of the `boxes` parameter. - images: (Optional) a batch of images aligned with `boxes` on the first - axis. Should be at least 3 dimensions, with the first 3 dimensions - representing: `[batch_size, height, width]`. Used in some - converters to compute relative pixel values of the bounding box - dimensions. Required when transforming from a rel format to a - non-rel format. - dtype: the data type to use when transforming the boxes, defaults to - `"float32"`. - """ - if isinstance(boxes, dict): - converted_boxes = boxes.copy() - converted_boxes["boxes"] = convert_format( - boxes["boxes"], - source=source, - target=target, - images=images, - image_shape=image_shape, - dtype=dtype, - ) - return converted_boxes - - if boxes.shape[-1] is not None and boxes.shape[-1] != 4: - raise ValueError( - "Expected `boxes` to be a Tensor with a final dimension of " - f"`4`. Instead, got `boxes.shape={boxes.shape}`." - ) - if images is not None and image_shape is not None: - raise ValueError( - "convert_format() expects either `images` or `image_shape`, but " - f"not both. Received images={images} image_shape={image_shape}" - ) - - _validate_image_shape(image_shape) - - source = source.lower() - target = target.lower() - if source not in TO_XYXY_CONVERTERS: - raise ValueError( - "`convert_format()` received an unsupported format for the " - "argument `source`. `source` should be one of " - f"{TO_XYXY_CONVERTERS.keys()}. Got source={source}" - ) - if target not in FROM_XYXY_CONVERTERS: - raise ValueError( - "`convert_format()` received an unsupported format for the " - "argument `target`. `target` should be one of " - f"{FROM_XYXY_CONVERTERS.keys()}. Got target={target}" - ) - - boxes = ops.cast(boxes, dtype) - if source == target: - return boxes - - # rel->rel conversions should not require images - if source.startswith("rel") and target.startswith("rel"): - source = source.replace("rel_", "", 1) - target = target.replace("rel_", "", 1) - - boxes, images, squeeze = _format_inputs(boxes, images) - to_xyxy_fn = TO_XYXY_CONVERTERS[source] - from_xyxy_fn = FROM_XYXY_CONVERTERS[target] - - try: - in_xyxy = to_xyxy_fn(boxes, images=images, image_shape=image_shape) - result = from_xyxy_fn(in_xyxy, images=images, image_shape=image_shape) - except RequiresImagesException: - raise ValueError( - "convert_format() must receive `images` or `image_shape` when " - "transforming between relative and absolute formats." - f"convert_format() received source=`{format}`, target=`{format}, " - f"but images={images} and image_shape={image_shape}." - ) - - return _format_outputs(result, squeeze) - - -def _format_inputs(boxes, images): - boxes_rank = len(boxes.shape) - if boxes_rank > 3: - raise ValueError( - "Expected len(boxes.shape)=2, or len(boxes.shape)=3, got " - f"len(boxes.shape)={boxes_rank}" - ) - boxes_includes_batch = boxes_rank == 3 - # Determine if images needs an expand_dims() call - if images is not None: - images_rank = len(images.shape) - if images_rank > 4: - raise ValueError( - "Expected len(images.shape)=2, or len(images.shape)=3, got " - f"len(images.shape)={images_rank}" - ) - images_include_batch = images_rank == 4 - if boxes_includes_batch != images_include_batch: - raise ValueError( - "convert_format() expects both boxes and images to be batched, " - "or both boxes and images to be unbatched. Received " - f"len(boxes.shape)={boxes_rank}, " - f"len(images.shape)={images_rank}. Expected either " - "len(boxes.shape)=2 AND len(images.shape)=3, or " - "len(boxes.shape)=3 AND len(images.shape)=4." - ) - if not images_include_batch: - images = ops.expand_dims(images, axis=0) - - if not boxes_includes_batch: - return ops.expand_dims(boxes, axis=0), images, True - return boxes, images, False - - -def _validate_image_shape(image_shape): - # Escape early if image_shape is None and skip validation. - if image_shape is None: - return - # tuple/list - if isinstance(image_shape, (tuple, list)): - if len(image_shape) != 3: - raise ValueError( - "image_shape should be of length 3, but got " - f"image_shape={image_shape}" - ) - return - - # tensor - if ops.is_tensor(image_shape): - if len(image_shape.shape) > 1: - raise ValueError( - "image_shape.shape should be (3), but got " - f"image_shape.shape={image_shape.shape}" - ) - if image_shape.shape[0] != 3: - raise ValueError( - "image_shape.shape should be (3), but got " - f"image_shape.shape={image_shape.shape}" - ) - return - - # Warn about failure cases - raise ValueError( - "Expected image_shape to be either a tuple, list, Tensor. " - f"Received image_shape={image_shape}" - ) - - -def _format_outputs(boxes, squeeze): - if squeeze: - return ops.squeeze(boxes, axis=0) - return boxes - - -def _image_shape(images, image_shape, boxes): - if images is None and image_shape is None: - raise RequiresImagesException() - - if image_shape is None: - if not isinstance(images, tf.RaggedTensor): - image_shape = ops.shape(images) - height, width = image_shape[1], image_shape[2] - else: - height = ops.reshape(images.row_lengths(), (-1, 1)) - width = ops.reshape(ops.max(images.row_lengths(axis=2), 1), (-1, 1)) - height = ops.expand_dims(height, axis=-1) - width = ops.expand_dims(width, axis=-1) - else: - height, width = image_shape[0], image_shape[1] - return ops.cast(height, boxes.dtype), ops.cast(width, boxes.dtype) diff --git a/keras_hub/src/bounding_box/converters_test.py b/keras_hub/src/bounding_box/converters_test.py deleted file mode 100644 index 9617a2a2aa..0000000000 --- a/keras_hub/src/bounding_box/converters_test.py +++ /dev/null @@ -1,351 +0,0 @@ -import itertools - -import numpy as np -import pytest -import tensorflow as tf -from absl.testing import parameterized -from keras import backend - -from keras_hub.src.bounding_box import converters -from keras_hub.src.bounding_box import to_dense -from keras_hub.src.bounding_box import to_ragged -from keras_hub.src.tests.test_case import TestCase - - -class ConvertersTestCase(TestCase): - def setUp(self): - xyxy_box = np.array( - [[[10, 20, 110, 120], [20, 30, 120, 130]]], dtype="float32" - ) - yxyx_box = np.array( - [[[20, 10, 120, 110], [30, 20, 130, 120]]], dtype="float32" - ) - rel_xyxy_box = np.array( - [[[0.01, 0.02, 0.11, 0.12], [0.02, 0.03, 0.12, 0.13]]], - dtype="float32", - ) - rel_xyxy_box_ragged_images = np.array( - [[[0.10, 0.20, 1.1, 1.20], [0.40, 0.6, 2.40, 2.6]]], dtype="float32" - ) - rel_yxyx_box = np.array( - [[[0.02, 0.01, 0.12, 0.11], [0.03, 0.02, 0.13, 0.12]]], - dtype="float32", - ) - rel_yxyx_box_ragged_images = np.array( - [[[0.2, 0.1, 1.2, 1.1], [0.6, 0.4, 2.6, 2.4]]], dtype="float32" - ) - center_xywh_box = np.array( - [[[60, 70, 100, 100], [70, 80, 100, 100]]], dtype="float32" - ) - xywh_box = np.array( - [[[10, 20, 100, 100], [20, 30, 100, 100]]], dtype="float32" - ) - rel_xywh_box = np.array( - [[[0.01, 0.02, 0.1, 0.1], [0.02, 0.03, 0.1, 0.1]]], dtype="float32" - ) - rel_xywh_box_ragged_images = np.array( - [[[0.1, 0.2, 1, 1], [0.4, 0.6, 2, 2]]], dtype="float32" - ) - - self.ragged_images = tf.ragged.constant( - [ - np.ones(shape=[100, 100, 3]), - np.ones(shape=[50, 50, 3]), - ], # 2 images - ragged_rank=2, - ) - - self.images = np.ones([2, 1000, 1000, 3]) - - self.ragged_classes = tf.ragged.constant([[0], [0]], dtype="float32") - - self.boxes = { - "xyxy": xyxy_box, - "center_xywh": center_xywh_box, - "rel_xywh": rel_xywh_box, - "xywh": xywh_box, - "rel_xyxy": rel_xyxy_box, - "yxyx": yxyx_box, - "rel_yxyx": rel_yxyx_box, - } - - self.boxes_ragged_images = { - "xyxy": xyxy_box, - "center_xywh": center_xywh_box, - "rel_xywh": rel_xywh_box_ragged_images, - "xywh": xywh_box, - "rel_xyxy": rel_xyxy_box_ragged_images, - "yxyx": yxyx_box, - "rel_yxyx": rel_yxyx_box_ragged_images, - } - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - def test_converters(self, source, target): - source, target - source_box = self.boxes[source] - target_box = self.boxes[target] - - self.assertAllClose( - converters.convert_format( - source_box, source=source, target=target, images=self.images - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_converters_ragged_images(self, source, target): - source_box = _raggify(self.boxes_ragged_images[source]) - target_box = _raggify(self.boxes_ragged_images[target]) - self.assertAllClose( - converters.convert_format( - source_box, - source=source, - target=target, - images=self.ragged_images, - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - def test_converters_unbatched(self, source, target): - source_box = self.boxes[source][0] - target_box = self.boxes[target][0] - - self.assertAllClose( - converters.convert_format( - source_box, source=source, target=target, images=self.images[0] - ), - target_box, - ) - - def test_raises_with_different_image_rank(self): - source_box = self.boxes["xyxy"][0] - with self.assertRaises(ValueError): - converters.convert_format( - source_box, source="xyxy", target="xywh", images=self.images - ) - - def test_without_images(self): - source_box = self.boxes["xyxy"] - target_box = self.boxes["xywh"] - self.assertAllClose( - converters.convert_format(source_box, source="xyxy", target="xywh"), - target_box, - ) - - def test_rel_to_rel_without_images(self): - source_box = self.boxes["rel_xyxy"] - target_box = self.boxes["rel_yxyx"] - self.assertAllClose( - converters.convert_format( - source_box, source="rel_xyxy", target="rel_yxyx" - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_ragged_bounding_box(self, source, target): - source_box = _raggify(self.boxes[source]) - target_box = _raggify(self.boxes[target]) - self.assertAllClose( - converters.convert_format( - source_box, source=source, target=target, images=self.images - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_ragged_bounding_box_ragged_images(self, source, target): - source_box = _raggify(self.boxes_ragged_images[source]) - target_box = _raggify(self.boxes_ragged_images[target]) - self.assertAllClose( - converters.convert_format( - source_box, - source=source, - target=target, - images=self.ragged_images, - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_ragged_bounding_box_with_image_shape(self, source, target): - source_box = _raggify(self.boxes[source]) - target_box = _raggify(self.boxes[target]) - self.assertAllClose( - converters.convert_format( - source_box, - source=source, - target=target, - image_shape=(1000, 1000, 3), - ), - target_box, - ) - - @parameterized.named_parameters( - *[ - (f"{source}_{target}", source, target) - for (source, target) in itertools.permutations( - [ - "xyxy", - "center_xywh", - "rel_xywh", - "xywh", - "rel_xyxy", - "yxyx", - "rel_yxyx", - ], - 2, - ) - ] - + [("xyxy_xyxy", "xyxy", "xyxy")] - ) - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_dense_bounding_box_with_ragged_images(self, source, target): - source_box = _raggify(self.boxes_ragged_images[source]) - target_box = _raggify(self.boxes_ragged_images[target]) - source_bounding_boxes = { - "boxes": source_box, - "classes": self.ragged_classes, - } - source_bounding_boxes = to_dense.to_dense(source_bounding_boxes) - - result_bounding_boxes = converters.convert_format( - source_bounding_boxes, - source=source, - target=target, - images=self.ragged_images, - ) - result_bounding_boxes = to_ragged.to_ragged(result_bounding_boxes) - - self.assertAllClose( - result_bounding_boxes["boxes"], - target_box, - ) - - -def _raggify(tensor): - tensor = tf.squeeze(tensor, axis=0) - tensor = tf.RaggedTensor.from_row_lengths(tensor, [1, 1]) - return tensor diff --git a/keras_hub/src/bounding_box/formats.py b/keras_hub/src/bounding_box/formats.py deleted file mode 100644 index c8e50ab60a..0000000000 --- a/keras_hub/src/bounding_box/formats.py +++ /dev/null @@ -1,149 +0,0 @@ -""" -formats.py contains axis information for each supported format. -""" - -from keras_hub.src.api_export import keras_hub_export - - -@keras_hub_export("keras_hub.bounding_box.XYXY") -class XYXY: - """XYXY contains axis indices for the XYXY format. - - All values in the XYXY format should be absolute pixel values. - - The XYXY format consists of the following required indices: - - - LEFT: left of the bounding box - - TOP: top of the bounding box - - RIGHT: right of the bounding box - - BOTTOM: bottom of the bounding box - """ - - LEFT = 0 - TOP = 1 - RIGHT = 2 - BOTTOM = 3 - - -@keras_hub_export("keras_hub.bounding_box.REL_XYXY") -class REL_XYXY: - """REL_XYXY contains axis indices for the REL_XYXY format. - - REL_XYXY is like XYXY, but each value is relative to the width and height of - the origin image. Values are percentages of the origin images' width and - height respectively. - - The REL_XYXY format consists of the following required indices: - - - LEFT: left of the bounding box - - TOP: top of the bounding box - - RIGHT: right of the bounding box - - BOTTOM: bottom of the bounding box - """ - - LEFT = 0 - TOP = 1 - RIGHT = 2 - BOTTOM = 3 - - -@keras_hub_export("keras_hub.bounding_box.CENTER_XYWH") -class CENTER_XYWH: - """CENTER_XYWH contains axis indices for the CENTER_XYWH format. - - All values in the CENTER_XYWH format should be absolute pixel values. - - The CENTER_XYWH format consists of the following required indices: - - - X: X coordinate of the center of the bounding box - - Y: Y coordinate of the center of the bounding box - - WIDTH: width of the bounding box - - HEIGHT: height of the bounding box - """ - - X = 0 - Y = 1 - WIDTH = 2 - HEIGHT = 3 - - -@keras_hub_export("keras_hub.bounding_box.XYWH") -class XYWH: - """XYWH contains axis indices for the XYWH format. - - All values in the XYWH format should be absolute pixel values. - - The XYWH format consists of the following required indices: - - - X: X coordinate of the left of the bounding box - - Y: Y coordinate of the top of the bounding box - - WIDTH: width of the bounding box - - HEIGHT: height of the bounding box - """ - - X = 0 - Y = 1 - WIDTH = 2 - HEIGHT = 3 - - -@keras_hub_export("keras_hub.bounding_box.REL_XYWH") -class REL_XYWH: - """REL_XYWH contains axis indices for the XYWH format. - - REL_XYXY is like XYWH, but each value is relative to the width and height of - the origin image. Values are percentages of the origin images' width and - height respectively. - - - X: X coordinate of the left of the bounding box - - Y: Y coordinate of the top of the bounding box - - WIDTH: width of the bounding box - - HEIGHT: height of the bounding box - """ - - X = 0 - Y = 1 - WIDTH = 2 - HEIGHT = 3 - - -@keras_hub_export("keras_hub.bounding_box.YXYX") -class YXYX: - """YXYX contains axis indices for the YXYX format. - - All values in the YXYX format should be absolute pixel values. - - The YXYX format consists of the following required indices: - - - TOP: top of the bounding box - - LEFT: left of the bounding box - - BOTTOM: bottom of the bounding box - - RIGHT: right of the bounding box - """ - - TOP = 0 - LEFT = 1 - BOTTOM = 2 - RIGHT = 3 - - -@keras_hub_export("keras_hub.bounding_box.REL_YXYX") -class REL_YXYX: - """REL_YXYX contains axis indices for the REL_YXYX format. - - REL_YXYX is like YXYX, but each value is relative to the width and height of - the origin image. Values are percentages of the origin images' width and - height respectively. - - The REL_YXYX format consists of the following required indices: - - - TOP: top of the bounding box - - LEFT: left of the bounding box - - BOTTOM: bottom of the bounding box - - RIGHT: right of the bounding box - """ - - TOP = 0 - LEFT = 1 - BOTTOM = 2 - RIGHT = 3 diff --git a/keras_hub/src/bounding_box/iou.py b/keras_hub/src/bounding_box/iou.py deleted file mode 100644 index df2c907e4a..0000000000 --- a/keras_hub/src/bounding_box/iou.py +++ /dev/null @@ -1,251 +0,0 @@ -"""Contains functions to compute ious of bounding boxes.""" - -import math - -import keras -from keras import ops - -from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.bounding_box.converters import convert_format -from keras_hub.src.bounding_box.utils import as_relative -from keras_hub.src.bounding_box.utils import is_relative - - -def _compute_area(box): - """Computes area for bounding boxes - - Args: - box: [N, 4] or [batch_size, N, 4] float Tensor, either batched - or unbatched boxes. - Returns: - a float Tensor of [N] or [batch_size, N] - """ - y_min, x_min, y_max, x_max = ops.split(box[..., :4], 4, axis=-1) - return ops.squeeze((y_max - y_min) * (x_max - x_min), axis=-1) - - -def _compute_intersection(boxes1, boxes2): - """Computes intersection area between two sets of boxes. - - Args: - boxes1: [N, 4] or [batch_size, N, 4] float Tensor boxes. - boxes2: [M, 4] or [batch_size, M, 4] float Tensor boxes. - Returns: - a [N, M] or [batch_size, N, M] float Tensor. - """ - y_min1, x_min1, y_max1, x_max1 = ops.split(boxes1[..., :4], 4, axis=-1) - y_min2, x_min2, y_max2, x_max2 = ops.split(boxes2[..., :4], 4, axis=-1) - boxes2_rank = len(boxes2.shape) - perm = [1, 0] if boxes2_rank == 2 else [0, 2, 1] - # [N, M] or [batch_size, N, M] - intersect_ymax = ops.minimum(y_max1, ops.transpose(y_max2, perm)) - intersect_ymin = ops.maximum(y_min1, ops.transpose(y_min2, perm)) - intersect_xmax = ops.minimum(x_max1, ops.transpose(x_max2, perm)) - intersect_xmin = ops.maximum(x_min1, ops.transpose(x_min2, perm)) - - intersect_height = intersect_ymax - intersect_ymin - intersect_width = intersect_xmax - intersect_xmin - zeros_t = ops.cast(0, intersect_height.dtype) - intersect_height = ops.maximum(zeros_t, intersect_height) - intersect_width = ops.maximum(zeros_t, intersect_width) - - return intersect_height * intersect_width - - -@keras_hub_export("keras_hub.bounding_box.compute_iou") -def compute_iou( - boxes1, - boxes2, - bounding_box_format, - use_masking=False, - mask_val=-1, - images=None, - image_shape=None, -): - """Computes a lookup table vector containing the ious for a given set boxes. - - The lookup vector is to be indexed by [`boxes1_index`,`boxes2_index`] if - boxes are unbatched and by [`batch`, `boxes1_index`,`boxes2_index`] if the - boxes are batched. - - The users can pass `boxes1` and `boxes2` to be different ranks. For example: - 1) `boxes1`: [batch_size, M, 4], `boxes2`: [batch_size, N, 4] -> return - [batch_size, M, N]. - 2) `boxes1`: [batch_size, M, 4], `boxes2`: [N, 4] -> return - [batch_size, M, N] - 3) `boxes1`: [M, 4], `boxes2`: [batch_size, N, 4] -> return - [batch_size, M, N] - 4) `boxes1`: [M, 4], `boxes2`: [N, 4] -> return [M, N] - - Args: - boxes1: a list of bounding boxes in 'corners' format. Can be batched or - unbatched. - boxes2: a list of bounding boxes in 'corners' format. Can be batched or - unbatched. - bounding_box_format: a case-insensitive string which is one of `"xyxy"`, - `"rel_xyxy"`, `"xyWH"`, `"center_xyWH"`, `"yxyx"`, `"rel_yxyx"`. - For detailed information on the supported format, see the - [KerasCV bounding box documentation](https://keras.io/api/keras_cv/bounding_box/formats/). - use_masking: whether masking will be applied. This will mask all `boxes1` - or `boxes2` that have values less than 0 in all its 4 dimensions. - Default to `False`. - mask_val: int to mask those returned IOUs if the masking is True, defaults - to -1. - - Returns: - iou_lookup_table: a vector containing the pairwise ious of boxes1 and - boxes2. - """ # noqa: E501 - - boxes1_rank = len(boxes1.shape) - boxes2_rank = len(boxes2.shape) - - if boxes1_rank not in [2, 3]: - raise ValueError( - "compute_iou() expects boxes1 to be batched, or to be unbatched. " - f"Received len(boxes1.shape)={boxes1_rank}, " - f"len(boxes2.shape)={boxes2_rank}. Expected either " - "len(boxes1.shape)=2 AND or len(boxes1.shape)=3." - ) - if boxes2_rank not in [2, 3]: - raise ValueError( - "compute_iou() expects boxes2 to be batched, or to be unbatched. " - f"Received len(boxes1.shape)={boxes1_rank}, " - f"len(boxes2.shape)={boxes2_rank}. Expected either " - "len(boxes2.shape)=2 AND or len(boxes2.shape)=3." - ) - - target_format = "yxyx" - if is_relative(bounding_box_format): - target_format = as_relative(target_format) - - boxes1 = convert_format( - boxes1, - source=bounding_box_format, - target=target_format, - images=images, - image_shape=image_shape, - ) - - boxes2 = convert_format( - boxes2, - source=bounding_box_format, - target=target_format, - images=images, - image_shape=image_shape, - ) - - intersect_area = _compute_intersection(boxes1, boxes2) - boxes1_area = _compute_area(boxes1) - boxes2_area = _compute_area(boxes2) - boxes2_area_rank = len(boxes2_area.shape) - boxes2_axis = 1 if (boxes2_area_rank == 2) else 0 - boxes1_area = ops.expand_dims(boxes1_area, axis=-1) - boxes2_area = ops.expand_dims(boxes2_area, axis=boxes2_axis) - union_area = boxes1_area + boxes2_area - intersect_area - res = ops.divide(intersect_area, union_area + keras.backend.epsilon()) - - if boxes1_rank == 2: - perm = [1, 0] - else: - perm = [0, 2, 1] - - if not use_masking: - return res - - mask_val_t = ops.cast(mask_val, res.dtype) * ops.ones_like(res) - boxes1_mask = ops.less(ops.max(boxes1, axis=-1, keepdims=True), 0.0) - boxes2_mask = ops.less(ops.max(boxes2, axis=-1, keepdims=True), 0.0) - background_mask = ops.logical_or( - boxes1_mask, ops.transpose(boxes2_mask, perm) - ) - iou_lookup_table = ops.where(background_mask, mask_val_t, res) - return iou_lookup_table - - -@keras_hub_export("keras_hub.bounding_box.compute_ciou") -def compute_ciou(boxes1, boxes2, bounding_box_format): - """ - Computes the Complete IoU (CIoU) between two bounding boxes or between - two batches of bounding boxes. - - CIoU loss is an extension of GIoU loss, which further improves the IoU - optimization for object detection. CIoU loss not only penalizes the - bounding box coordinates but also considers the aspect ratio and center - distance of the boxes. The length of the last dimension should be 4 to - represent the bounding boxes. - - Args: - box1 (tensor): tensor representing the first bounding box with - shape (..., 4). - box2 (tensor): tensor representing the second bounding box with - shape (..., 4). - bounding_box_format: a case-insensitive string (for example, "xyxy"). - Each bounding box is defined by these 4 values. For detailed - information on the supported formats, see the [KerasCV bounding box - documentation](https://keras.io/api/keras_cv/bounding_box/formats/). - - Returns: - tensor: The CIoU distance between the two bounding boxes. - """ - target_format = "xyxy" - if is_relative(bounding_box_format): - target_format = as_relative(target_format) - - boxes1 = convert_format( - boxes1, source=bounding_box_format, target=target_format - ) - - boxes2 = convert_format( - boxes2, source=bounding_box_format, target=target_format - ) - - x_min1, y_min1, x_max1, y_max1 = ops.split(boxes1[..., :4], 4, axis=-1) - x_min2, y_min2, x_max2, y_max2 = ops.split(boxes2[..., :4], 4, axis=-1) - - width_1 = x_max1 - x_min1 - height_1 = y_max1 - y_min1 + keras.backend.epsilon() - width_2 = x_max2 - x_min2 - height_2 = y_max2 - y_min2 + keras.backend.epsilon() - - intersection_area = ops.maximum( - ops.minimum(x_max1, x_max2) - ops.maximum(x_min1, x_min2), 0 - ) * ops.maximum( - ops.minimum(y_max1, y_max2) - ops.maximum(y_min1, y_min2), 0 - ) - union_area = ( - width_1 * height_1 - + width_2 * height_2 - - intersection_area - + keras.backend.epsilon() - ) - iou = ops.squeeze( - ops.divide(intersection_area, union_area + keras.backend.epsilon()), - axis=-1, - ) - - convex_width = ops.maximum(x_max1, x_max2) - ops.minimum(x_min1, x_min2) - convex_height = ops.maximum(y_max1, y_max2) - ops.minimum(y_min1, y_min2) - convex_diagonal_squared = ops.squeeze( - convex_width**2 + convex_height**2 + keras.backend.epsilon(), - axis=-1, - ) - centers_distance_squared = ops.squeeze( - ((x_min1 + x_max1) / 2 - (x_min2 + x_max2) / 2) ** 2 - + ((y_min1 + y_max1) / 2 - (y_min2 + y_max2) / 2) ** 2, - axis=-1, - ) - - v = ops.squeeze( - ops.power( - (4 / math.pi**2) - * (ops.arctan(width_2 / height_2) - ops.arctan(width_1 / height_1)), - 2, - ), - axis=-1, - ) - alpha = v / (v - iou + (1 + keras.backend.epsilon())) - - return iou - ( - centers_distance_squared / convex_diagonal_squared + v * alpha - ) diff --git a/keras_hub/src/bounding_box/iou_test.py b/keras_hub/src/bounding_box/iou_test.py deleted file mode 100644 index 2e00f24869..0000000000 --- a/keras_hub/src/bounding_box/iou_test.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Tests for iou functions.""" - -import numpy as np - -from keras_hub.src.bounding_box import iou as iou_lib -from keras_hub.src.tests.test_case import TestCase - - -class IoUTest(TestCase): - def test_compute_single_iou(self): - bb1 = np.array([[100, 101, 200, 201]]) - bb1_off_by_1 = np.array([[101, 102, 201, 202]]) - # area of bb1 and bb1_off_by_1 are each 10000. - # intersection area is 99*99=9801 - # iou=9801/(2*10000 - 9801)=0.96097656633 - self.assertAllClose( - iou_lib.compute_iou(bb1, bb1_off_by_1, "yxyx")[0], [0.96097656633] - ) - - def test_compute_iou(self): - bb1 = [100, 101, 200, 201] - bb1_off_by_1_pred = [101, 102, 201, 202] - iou_bb1_bb1_off = 0.96097656633 - top_left_bounding_box = [0, 2, 1, 3] - far_away_box = [1300, 1400, 1500, 1401] - another_far_away_pred = [1000, 1400, 1200, 1401] - - # Rows represent predictions, columns ground truths - expected_result = np.array( - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - dtype=np.float32, - ) - - sample_y_true = np.array([bb1, top_left_bounding_box, far_away_box]) - sample_y_pred = np.array( - [bb1_off_by_1_pred, top_left_bounding_box, another_far_away_pred], - ) - - result = iou_lib.compute_iou(sample_y_true, sample_y_pred, "yxyx") - self.assertAllClose(expected_result, result) - - def test_batched_compute_iou(self): - bb1 = [100, 101, 200, 201] - bb1_off_by_1_pred = [101, 102, 201, 202] - iou_bb1_bb1_off = 0.96097656633 - top_left_bounding_box = [0, 2, 1, 3] - far_away_box = [1300, 1400, 1500, 1401] - another_far_away_pred = [1000, 1400, 1200, 1401] - - # Rows represent predictions, columns ground truths - expected_result = np.array( - [ - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - ], - ) - - sample_y_true = np.array( - [ - [bb1, top_left_bounding_box, far_away_box], - [bb1, top_left_bounding_box, far_away_box], - ], - ) - sample_y_pred = np.array( - [ - [ - bb1_off_by_1_pred, - top_left_bounding_box, - another_far_away_pred, - ], - [ - bb1_off_by_1_pred, - top_left_bounding_box, - another_far_away_pred, - ], - ], - ) - - result = iou_lib.compute_iou(sample_y_true, sample_y_pred, "yxyx") - self.assertAllClose(expected_result, result) - - def test_batched_boxes1_unbatched_boxes2(self): - bb1 = [100, 101, 200, 201] - bb1_off_by_1_pred = [101, 102, 201, 202] - iou_bb1_bb1_off = 0.96097656633 - top_left_bounding_box = [0, 2, 1, 3] - far_away_box = [1300, 1400, 1500, 1401] - another_far_away_pred = [1000, 1400, 1200, 1401] - - # Rows represent predictions, columns ground truths - expected_result = np.array( - [ - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - ], - ) - - sample_y_true = np.array( - [ - [bb1, top_left_bounding_box, far_away_box], - [bb1, top_left_bounding_box, far_away_box], - ], - ) - sample_y_pred = np.array( - [bb1_off_by_1_pred, top_left_bounding_box, another_far_away_pred], - ) - - result = iou_lib.compute_iou(sample_y_true, sample_y_pred, "yxyx") - self.assertAllClose(expected_result, result) - - def test_unbatched_boxes1_batched_boxes2(self): - bb1 = [100, 101, 200, 201] - bb1_off_by_1_pred = [101, 102, 201, 202] - iou_bb1_bb1_off = 0.96097656633 - top_left_bounding_box = [0, 2, 1, 3] - far_away_box = [1300, 1400, 1500, 1401] - another_far_away_pred = [1000, 1400, 1200, 1401] - - # Rows represent predictions, columns ground truths - expected_result = np.array( - [ - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - [[iou_bb1_bb1_off, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 0.0]], - ], - ) - - sample_y_true = np.array( - [ - [bb1, top_left_bounding_box, far_away_box], - ], - ) - sample_y_pred = np.array( - [ - [ - bb1_off_by_1_pred, - top_left_bounding_box, - another_far_away_pred, - ], - [ - bb1_off_by_1_pred, - top_left_bounding_box, - another_far_away_pred, - ], - ], - ) - - result = iou_lib.compute_iou(sample_y_true, sample_y_pred, "yxyx") - self.assertAllClose(expected_result, result) diff --git a/keras_hub/src/bounding_box/to_dense.py b/keras_hub/src/bounding_box/to_dense.py deleted file mode 100644 index 68b00d065f..0000000000 --- a/keras_hub/src/bounding_box/to_dense.py +++ /dev/null @@ -1,81 +0,0 @@ -import keras_hub.src.bounding_box.validate_format as validate_format -from keras_hub.src.api_export import keras_hub_export - -try: - import tensorflow as tf -except ImportError: - tf = None - - -def _box_shape(batched, boxes_shape, max_boxes): - # ensure we dont drop the final axis in RaggedTensor mode - if max_boxes is None: - shape = list(boxes_shape) - shape[-1] = 4 - return shape - if batched: - return [None, max_boxes, 4] - return [max_boxes, 4] - - -def _classes_shape(batched, classes_shape, max_boxes): - if max_boxes is None: - return None - if batched: - return [None, max_boxes] + classes_shape[2:] - return [max_boxes] + classes_shape[2:] - - -@keras_hub_export("keras_hub.bounding_box.to_dense") -def to_dense(bounding_boxes, max_boxes=None, default_value=-1): - """to_dense converts bounding boxes to Dense tensors - - Args: - bounding_boxes: bounding boxes in KerasCV dictionary format. - max_boxes: the maximum number of boxes, used to pad tensors to a given - shape. This can be used to make object detection pipelines TPU - compatible. - default_value: the default value to pad bounding boxes with. defaults - to -1. - """ - info = validate_format.validate_format(bounding_boxes) - - # guards against errors in metrics regarding modification of inputs. - # also guards against unexpected behavior when modifying downstream - bounding_boxes = bounding_boxes.copy() - - # Already running in masked mode - if not info["ragged"]: - # even if already ragged, still copy the dictionary for API consistency - return bounding_boxes - - if isinstance(bounding_boxes["classes"], tf.RaggedTensor): - bounding_boxes["classes"] = bounding_boxes["classes"].to_tensor( - default_value=default_value, - shape=_classes_shape( - info["is_batched"], bounding_boxes["classes"].shape, max_boxes - ), - ) - - if isinstance(bounding_boxes["boxes"], tf.RaggedTensor): - bounding_boxes["boxes"] = bounding_boxes["boxes"].to_tensor( - default_value=default_value, - shape=_box_shape( - info["is_batched"], bounding_boxes["boxes"].shape, max_boxes - ), - ) - - if "confidence" in bounding_boxes: - if isinstance(bounding_boxes["confidence"], tf.RaggedTensor): - bounding_boxes["confidence"] = bounding_boxes[ - "confidence" - ].to_tensor( - default_value=default_value, - shape=_classes_shape( - info["is_batched"], - bounding_boxes["confidence"].shape, - max_boxes, - ), - ) - - return bounding_boxes diff --git a/keras_hub/src/bounding_box/to_dense_test.py b/keras_hub/src/bounding_box/to_dense_test.py deleted file mode 100644 index 91acb8137a..0000000000 --- a/keras_hub/src/bounding_box/to_dense_test.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest -import tensorflow as tf -from keras import backend - -from keras_hub.src.bounding_box import to_dense -from keras_hub.src.tests.test_case import TestCase - - -class ToDenseTest(TestCase): - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_converts_to_dense(self): - bounding_boxes = { - "boxes": tf.ragged.constant( - [[[0, 0, 1, 1]], [[0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 1, 1]]] - ), - "classes": tf.ragged.constant([[0], [1, 2, 3]]), - } - bounding_boxes = to_dense.to_dense(bounding_boxes) - self.assertEqual(bounding_boxes["boxes"].shape, [2, 3, 4]) - self.assertEqual(bounding_boxes["classes"].shape, [2, 3]) diff --git a/keras_hub/src/bounding_box/to_ragged.py b/keras_hub/src/bounding_box/to_ragged.py deleted file mode 100644 index f86712dd35..0000000000 --- a/keras_hub/src/bounding_box/to_ragged.py +++ /dev/null @@ -1,86 +0,0 @@ -import keras - -import keras_hub.src.bounding_box.validate_format as validate_format -from keras_hub.src.api_export import keras_hub_export - -try: - import tensorflow as tf -except ImportError: - tf = None - - -@keras_hub_export("keras_hub.bounding_box.to_ragged") -def to_ragged(bounding_boxes, sentinel=-1, dtype="float32"): - """converts a Dense padded bounding box `tf.Tensor` to a `tf.RaggedTensor`. - - Bounding boxes are ragged tensors in most use cases. Converting them to a - dense tensor makes it easier to work with Tensorflow ecosystem. - This function can be used to filter out the masked out bounding boxes by - checking for padded sentinel value of the class_id axis of the - bounding_boxes. - - Example: - ```python - bounding_boxes = { - "boxes": tf.constant([[2, 3, 4, 5], [0, 1, 2, 3]]), - "classes": tf.constant([[-1, 1]]), - } - bounding_boxes = bounding_box.to_ragged(bounding_boxes) - print(bounding_boxes) - # { - # "boxes": [[0, 1, 2, 3]], - # "classes": [[1]] - # } - ``` - - Args: - bounding_boxes: a Tensor of bounding boxes. May be batched, or - unbatched. - sentinel: The value indicating that a bounding box does not exist at the - current index, and the corresponding box is padding, defaults to -1. - dtype: the data type to use for the underlying Tensors. - Returns: - dictionary of `tf.RaggedTensor` or 'tf.Tensor' containing the filtered - bounding boxes. - """ - if keras.config.backend() != "tensorflow": - raise NotImplementedError( - "`bounding_box.to_ragged` was called using a backend which does " - "not support ragged tensors. " - f"Current backend: {keras.backend.backend()}." - ) - - info = validate_format.validate_format(bounding_boxes) - - if info["ragged"]: - return bounding_boxes - - boxes = bounding_boxes.get("boxes") - classes = bounding_boxes.get("classes") - confidence = bounding_boxes.get("confidence", None) - - mask = classes != sentinel - - boxes = tf.ragged.boolean_mask(boxes, mask) - classes = tf.ragged.boolean_mask(classes, mask) - if confidence is not None: - confidence = tf.ragged.boolean_mask(confidence, mask) - - if isinstance(boxes, tf.Tensor): - boxes = tf.RaggedTensor.from_tensor(boxes) - - if isinstance(classes, tf.Tensor) and len(classes.shape) > 1: - classes = tf.RaggedTensor.from_tensor(classes) - - if confidence is not None: - if isinstance(confidence, tf.Tensor) and len(confidence.shape) > 1: - confidence = tf.RaggedTensor.from_tensor(confidence) - - result = bounding_boxes.copy() - result["boxes"] = tf.cast(boxes, dtype) - result["classes"] = tf.cast(classes, dtype) - - if confidence is not None: - result["confidence"] = tf.cast(confidence, dtype) - - return result diff --git a/keras_hub/src/bounding_box/to_ragged_test.py b/keras_hub/src/bounding_box/to_ragged_test.py deleted file mode 100644 index 9b76866ddc..0000000000 --- a/keras_hub/src/bounding_box/to_ragged_test.py +++ /dev/null @@ -1,87 +0,0 @@ -import numpy as np -import pytest -from keras import backend - -from keras_hub.src.bounding_box import to_dense -from keras_hub.src.bounding_box import to_ragged -from keras_hub.src.tests.test_case import TestCase - - -class ToRaggedTest(TestCase): - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_converts_to_ragged(self): - bounding_boxes = { - "boxes": np.array( - [[[0, 0, 0, 0], [0, 0, 0, 0]], [[2, 3, 4, 5], [0, 1, 2, 3]]] - ), - "classes": np.array([[-1, -1], [-1, 1]]), - "confidence": np.array([[0.5, 0.7], [0.23, 0.12]]), - } - bounding_boxes = to_ragged.to_ragged(bounding_boxes) - - self.assertEqual(bounding_boxes["boxes"][1].shape, [1, 4]) - self.assertEqual(bounding_boxes["classes"][1].shape, [1]) - self.assertEqual( - bounding_boxes["confidence"][1].shape, - [ - 1, - ], - ) - - self.assertEqual(bounding_boxes["classes"][0].shape, [0]) - self.assertEqual(bounding_boxes["boxes"][0].shape, [0, 4]) - self.assertEqual( - bounding_boxes["confidence"][0].shape, - [ - 0, - ], - ) - - @pytest.mark.skipif( - backend.backend() != "tensorflow", - reason="Only applies to backends which support raggeds", - ) - def test_round_trip(self): - original = { - "boxes": np.array( - [ - [[0, 0, 0, 0], [-1, -1, -1, -1]], - [[-1, -1, -1, -1], [-1, -1, -1, -1]], - ] - ), - "classes": np.array([[1, -1], [-1, -1]]), - "confidence": np.array([[0.5, -1], [-1, -1]]), - } - bounding_boxes = to_ragged.to_ragged(original) - bounding_boxes = to_dense.to_dense(bounding_boxes, max_boxes=2) - - self.assertEqual(bounding_boxes["boxes"][1].shape, [2, 4]) - self.assertEqual(bounding_boxes["classes"][1].shape, [2]) - self.assertEqual(bounding_boxes["classes"][0].shape, [2]) - self.assertEqual(bounding_boxes["boxes"][0].shape, [2, 4]) - self.assertEqual(bounding_boxes["confidence"][0].shape, [2]) - - self.assertAllEqual(bounding_boxes["boxes"], original["boxes"]) - self.assertAllEqual(bounding_boxes["classes"], original["classes"]) - self.assertAllEqual( - bounding_boxes["confidence"], original["confidence"] - ) - - @pytest.mark.skipif( - backend.backend() == "tensorflow", - reason="Only applies to backends which don't support raggeds", - ) - def test_backend_without_raggeds_throws(self): - bounding_boxes = { - "boxes": np.array( - [[[0, 0, 0, 0], [0, 0, 0, 0]], [[2, 3, 4, 5], [0, 1, 2, 3]]] - ), - "classes": np.array([[-1, -1], [-1, 1]]), - "confidence": np.array([[0.5, 0.7], [0.23, 0.12]]), - } - - with self.assertRaisesRegex(NotImplementedError, "support ragged"): - to_ragged.to_ragged(bounding_boxes) diff --git a/keras_hub/src/bounding_box/utils.py b/keras_hub/src/bounding_box/utils.py deleted file mode 100644 index ac4fe8d05b..0000000000 --- a/keras_hub/src/bounding_box/utils.py +++ /dev/null @@ -1,181 +0,0 @@ -"""Utility functions for working with bounding boxes.""" - -from keras import ops - -from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.bounding_box import converters -from keras_hub.src.bounding_box.formats import XYWH - - -@keras_hub_export("keras_hub.bounding_box.is_relative") -def is_relative(bounding_box_format): - """A util to check if a bounding box format uses relative coordinates""" - if bounding_box_format.lower() not in converters.TO_XYXY_CONVERTERS: - raise ValueError( - "`is_relative()` received an unsupported format for the argument " - f"`bounding_box_format`. `bounding_box_format` should be one of " - f"{converters.TO_XYXY_CONVERTERS.keys()}. " - f"Got bounding_box_format={bounding_box_format}" - ) - - return bounding_box_format.startswith("rel") - - -@keras_hub_export("keras_hub.bounding_box.as_relative") -def as_relative(bounding_box_format): - """A util to get the relative equivalent of a provided bounding box format. - - If the specified format is already a relative format, - it will be returned unchanged. - """ - - if not is_relative(bounding_box_format): - return "rel_" + bounding_box_format - - return bounding_box_format - - -def _relative_area(boxes, bounding_box_format): - boxes = converters.convert_format( - boxes, - source=bounding_box_format, - target="rel_xywh", - ) - widths = boxes[..., XYWH.WIDTH] - heights = boxes[..., XYWH.HEIGHT] - # handle corner case where shear performs a full inversion. - return ops.where( - ops.logical_and(widths > 0, heights > 0), widths * heights, 0.0 - ) - - -@keras_hub_export("keras_hub.bounding_box.clip_to_image") -def clip_to_image( - bounding_boxes, bounding_box_format, images=None, image_shape=None -): - """clips bounding boxes to image boundaries. - - `clip_to_image()` clips bounding boxes that have coordinates out of bounds - of an image down to the boundaries of the image. This is done by converting - the bounding box to relative formats, then clipping them to the `[0, 1]` - range. Additionally, bounding boxes that end up with a zero area have their - class ID set to -1, indicating that there is no object present in them. - - Args: - bounding_boxes: bounding box tensor to clip. - bounding_box_format: the KerasCV bounding box format the bounding boxes - are in. - images: list of images to clip the bounding boxes to. - image_shape: the shape of the images to clip the bounding boxes to. - """ - boxes, classes = bounding_boxes["boxes"], bounding_boxes["classes"] - - boxes = converters.convert_format( - boxes, - source=bounding_box_format, - target="rel_xyxy", - images=images, - image_shape=image_shape, - ) - boxes, classes, images, squeeze = _format_inputs(boxes, classes, images) - x1, y1, x2, y2 = ops.split(boxes, 4, axis=-1) - clipped_bounding_boxes = ops.concatenate( - [ - ops.clip(x1, 0, 1), - ops.clip(y1, 0, 1), - ops.clip(x2, 0, 1), - ops.clip(y2, 0, 1), - ], - axis=-1, - ) - areas = _relative_area( - clipped_bounding_boxes, bounding_box_format="rel_xyxy" - ) - clipped_bounding_boxes = converters.convert_format( - clipped_bounding_boxes, - source="rel_xyxy", - target=bounding_box_format, - images=images, - image_shape=image_shape, - ) - clipped_bounding_boxes = ops.where( - ops.expand_dims(areas > 0.0, axis=-1), clipped_bounding_boxes, -1.0 - ) - classes = ops.where(areas > 0.0, classes, -1) - nan_indices = ops.any(ops.isnan(clipped_bounding_boxes), axis=-1) - classes = ops.where(nan_indices, -1, classes) - - # TODO update dict and return - clipped_bounding_boxes, classes = _format_outputs( - clipped_bounding_boxes, classes, squeeze - ) - - bounding_boxes.update({"boxes": clipped_bounding_boxes, "classes": classes}) - - return bounding_boxes - - -@keras_hub_export("keras_hub.bounding_box.clip_boxes") -def clip_boxes(boxes, image_shape): - """Clip boxes to the boundaries of the image shape""" - if boxes.shape[-1] != 4: - raise ValueError( - "boxes.shape[-1] is {:d}, but must be 4.".format(boxes.shape[-1]) - ) - - if isinstance(image_shape, list) or isinstance(image_shape, tuple): - height, width, _ = image_shape - max_length = ops.stack([height, width, height, width], axis=-1) - else: - image_shape = ops.cast(image_shape, dtype=boxes.dtype) - height = image_shape[0] - width = image_shape[1] - max_length = ops.stack([height, width, height, width], axis=-1) - - clipped_boxes = ops.maximum(ops.minimum(boxes, max_length), 0.0) - return clipped_boxes - - -def _format_inputs(boxes, classes, images): - boxes_rank = len(boxes.shape) - if boxes_rank > 3: - raise ValueError( - "Expected len(boxes.shape)=2, or len(boxes.shape)=3, got " - f"len(boxes.shape)={boxes_rank}" - ) - boxes_includes_batch = boxes_rank == 3 - # Determine if images needs an expand_dims() call - if images is not None: - images_rank = len(images.shape) - if images_rank > 4: - raise ValueError( - "Expected len(images.shape)=2, or len(images.shape)=3, got " - f"len(images.shape)={images_rank}" - ) - images_include_batch = images_rank == 4 - if boxes_includes_batch != images_include_batch: - raise ValueError( - "clip_to_image() expects both boxes and images to be batched, " - "or both boxes and images to be unbatched. Received " - f"len(boxes.shape)={boxes_rank}, " - f"len(images.shape)={images_rank}. Expected either " - "len(boxes.shape)=2 AND len(images.shape)=3, or " - "len(boxes.shape)=3 AND len(images.shape)=4." - ) - if not images_include_batch: - images = ops.expand_dims(images, axis=0) - - if not boxes_includes_batch: - return ( - ops.expand_dims(boxes, axis=0), - ops.expand_dims(classes, axis=0), - images, - True, - ) - return boxes, classes, images, False - - -def _format_outputs(boxes, classes, squeeze): - if squeeze: - return ops.squeeze(boxes, axis=0), ops.squeeze(classes, axis=0) - return boxes, classes diff --git a/keras_hub/src/bounding_box/utils_test.py b/keras_hub/src/bounding_box/utils_test.py deleted file mode 100644 index 40ad8e6e07..0000000000 --- a/keras_hub/src/bounding_box/utils_test.py +++ /dev/null @@ -1,155 +0,0 @@ -import numpy as np -from keras import ops - -from keras_hub.src.bounding_box import utils -from keras_hub.src.tests.test_case import TestCase - - -class BoundingBoxUtilTest(TestCase): - def test_clip_to_image_standard(self): - # Test xyxy format unbatched - height = 256 - width = 256 - bounding_boxes = { - "boxes": np.array([[200, 200, 400, 400], [100, 100, 300, 300]]), - "classes": np.array([0, 0]), - } - image = ops.ones(shape=(height, width, 3)) - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="xyxy", images=image - ) - boxes = bounding_boxes["boxes"] - self.assertAllGreaterEqual(ops.convert_to_numpy(boxes), 0) - ( - x1, - y1, - x2, - y2, - ) = ops.split(boxes, 4, axis=1) - self.assertAllLessEqual( - ops.convert_to_numpy(ops.concatenate([x1, x2], axis=1)), width - ) - self.assertAllLessEqual( - ops.convert_to_numpy(ops.concatenate([y1, y2], axis=1)), height - ) - # Test relative format batched - image = ops.ones(shape=(1, height, width, 3)) - - bounding_boxes = { - "boxes": np.array([[[0.2, -1, 1.2, 0.3], [0.4, 1.5, 0.2, 0.3]]]), - "classes": np.array([[0, 0]]), - } - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="rel_xyxy", images=image - ) - boxes = bounding_boxes["boxes"] - self.assertAllLessEqual(ops.convert_to_numpy(boxes), 1) - - def test_clip_to_image_filters_fully_out_bounding_boxes(self): - # Test xyxy format unbatched - height = 256 - width = 256 - bounding_boxes = { - "boxes": np.array([[257, 257, 400, 400], [100, 100, 300, 300]]), - "classes": np.array([0, 0]), - } - image = ops.ones(shape=(height, width, 3)) - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="xyxy", images=image - ) - - ( - self.assertAllEqual( - bounding_boxes["boxes"], - np.array([[-1, -1, -1, -1], [100, 100, 256, 256]]), - ), - ) - self.assertAllEqual( - bounding_boxes["classes"], - np.array([-1, 0]), - ) - - def test_clip_to_image_filters_fully_out_bounding_boxes_negative_area(self): - # Test xyxy format unbatched - height = 256 - width = 256 - bounding_boxes = { - "boxes": np.array([[110, 120, 100, 100], [100, 100, 300, 300]]), - "classes": np.array([0, 0]), - } - image = ops.ones(shape=(height, width, 3)) - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="xyxy", images=image - ) - self.assertAllEqual( - bounding_boxes["boxes"], - np.array( - [ - [ - -1, - -1, - -1, - -1, - ], - [ - 100, - 100, - 256, - 256, - ], - ] - ), - ) - self.assertAllEqual( - bounding_boxes["classes"], - np.array([-1, 0]), - ) - - def test_clip_to_image_filters_nans(self): - # Test xyxy format unbatched - height = 256 - width = 256 - bounding_boxes = { - "boxes": np.array( - [[0, float("NaN"), 100, 100], [100, 100, 300, 300]] - ), - "classes": np.array([0, 0]), - } - image = ops.ones(shape=(height, width, 3)) - bounding_boxes = utils.clip_to_image( - bounding_boxes, bounding_box_format="xyxy", images=image - ) - self.assertAllEqual( - bounding_boxes["boxes"], - np.array( - [ - [ - -1, - -1, - -1, - -1, - ], - [ - 100, - 100, - 256, - 256, - ], - ] - ), - ) - self.assertAllEqual( - bounding_boxes["classes"], - np.array([-1, 0]), - ) - - def test_is_relative_util(self): - self.assertTrue(utils.is_relative("rel_xyxy")) - self.assertFalse(utils.is_relative("xyxy")) - - with self.assertRaises(ValueError): - _ = utils.is_relative("bad_format") - - def test_as_relative_util(self): - self.assertEqual(utils.as_relative("yxyx"), "rel_yxyx") - self.assertEqual(utils.as_relative("rel_xywh"), "rel_xywh") diff --git a/keras_hub/src/bounding_box/validate_format.py b/keras_hub/src/bounding_box/validate_format.py deleted file mode 100644 index 8680dbb693..0000000000 --- a/keras_hub/src/bounding_box/validate_format.py +++ /dev/null @@ -1,85 +0,0 @@ -from keras_hub.src.api_export import keras_hub_export - -try: - import tensorflow as tf -except ImportError: - tf = None - - -@keras_hub_export("keras_hub.bounding_box.validate_format") -def validate_format(bounding_boxes, variable_name="bounding_boxes"): - """validates that a given set of bounding boxes complies with KerasHub - format. - - For a set of bounding boxes to be valid it must satisfy the following - conditions: - - `bounding_boxes` must be a dictionary - - contains keys `"boxes"` and `"classes"` - - each entry must have matching first two dimensions; representing the batch - axis and the number of boxes per image axis. - - either both `"boxes"` and `"classes"` are batched, or both are unbatched. - - Additionally, one of the following must be satisfied: - - `"boxes"` and `"classes"` are both Ragged - - `"boxes"` and `"classes"` are both Dense - - `"boxes"` and `"classes"` are unbatched - - Args: - bounding_boxes: dictionary of bounding boxes according to KerasCV - format. - - Raises: - ValueError if any of the above conditions are not met - """ - if not isinstance(bounding_boxes, dict): - raise ValueError( - f"Expected `{variable_name}` to be a dictionary, got " - f"`{variable_name}={bounding_boxes}`." - ) - if not all([x in bounding_boxes for x in ["boxes", "classes"]]): - raise ValueError( - f"Expected `{variable_name}` to be a dictionary containing keys " - "`'classes'` and `'boxes'`. Got " - f"`{variable_name}.keys()={bounding_boxes.keys()}`." - ) - - boxes = bounding_boxes.get("boxes") - classes = bounding_boxes.get("classes") - info = {} - - is_batched = len(boxes.shape) == 3 - info["is_batched"] = is_batched - info["ragged"] = isinstance(boxes, tf.RaggedTensor) - - if not is_batched: - if boxes.shape[:1] != classes.shape[:1]: - raise ValueError( - "Expected `boxes` and `classes` to have matching dimensions " - "on the first axis when operating in unbatched mode. Got " - f"`boxes.shape={boxes.shape}`, `classes.shape={classes.shape}`." - ) - - info["classes_one_hot"] = len(classes.shape) == 2 - # No Ragged checks needed in unbatched mode. - return info - - info["classes_one_hot"] = len(classes.shape) == 3 - - if isinstance(boxes, tf.RaggedTensor) != isinstance( - classes, tf.RaggedTensor - ): - raise ValueError( - "Either both `boxes` and `classes` " - "should be Ragged, or neither should be ragged." - f" Got `type(boxes)={type(boxes)}`, type(classes)={type(classes)}." - ) - - # Batched mode checks - if boxes.shape[:2] != classes.shape[:2]: - raise ValueError( - "Expected `boxes` and `classes` to have matching dimensions " - "on the first two axes when operating in batched mode. " - f"Got `boxes.shape={boxes.shape}`, `classes.shape={classes.shape}`." - ) - - return info diff --git a/keras_hub/src/bounding_box/validate_format_test.py b/keras_hub/src/bounding_box/validate_format_test.py deleted file mode 100644 index e2025e290a..0000000000 --- a/keras_hub/src/bounding_box/validate_format_test.py +++ /dev/null @@ -1,34 +0,0 @@ -import numpy as np - -from keras_hub.src.bounding_box import validate_format -from keras_hub.src.tests.test_case import TestCase - - -class ValidateTest(TestCase): - def test_raises_nondict(self): - with self.assertRaisesRegex( - ValueError, "Expected `bounding_boxes` to be a dictionary, got " - ): - validate_format.validate_format(np.ones((4, 3, 6))) - - def test_mismatch_dimensions(self): - with self.assertRaisesRegex( - ValueError, - "Expected `boxes` and `classes` to have matching dimensions", - ): - validate_format.validate_format( - {"boxes": np.ones((4, 3, 6)), "classes": np.ones((4, 6))} - ) - - def test_bad_keys(self): - with self.assertRaisesRegex(ValueError, "containing keys"): - validate_format.validate_format( - { - "box": [ - 1, - 2, - 3, - ], - "class": [1234], - } - ) diff --git a/keras_hub/src/models/retinanet/anchor_generator.py b/keras_hub/src/layers/modeling/anchor_generator.py similarity index 92% rename from keras_hub/src/models/retinanet/anchor_generator.py rename to keras_hub/src/layers/modeling/anchor_generator.py index a3c3800c49..418fe7b130 100644 --- a/keras_hub/src/models/retinanet/anchor_generator.py +++ b/keras_hub/src/layers/modeling/anchor_generator.py @@ -4,20 +4,16 @@ from keras import ops from keras_hub.src.api_export import keras_hub_export - -# TODO: https://github.com/keras-team/keras-hub/issues/1965 -from keras_hub.src.bounding_box.converters import convert_format +from keras_hub.src.utils.tensor_utils import assert_bounding_box_support @keras_hub_export("keras_hub.layers.AnchorGenerator") class AnchorGenerator(keras.layers.Layer): """Generates anchor boxes for object detection tasks. - This layer creates a set of anchor boxes (also known as default boxes or priors) for use in object detection models, particularly those utilizing Feature Pyramid Networks (FPN). It generates anchors across multiple pyramid levels, with various scales and aspect ratios. - Feature Pyramid Levels: - Levels typically range from 2 to 6 (P2 to P7), corresponding to different resolutions of the input image. @@ -56,7 +52,7 @@ class AnchorGenerator(keras.layers.Layer): Example: ```python - anchor_generator = AnchorGenerator( + anchor_generator = keras_hub.layers.AnchorGenerator( bounding_box_format='xyxy', min_level=3, max_level=7, @@ -78,6 +74,9 @@ def __init__( anchor_size, **kwargs, ): + # Check whether current version of keras support bounding box utils + assert_bounding_box_support(self.__class__.__name__) + super().__init__(**kwargs) self.bounding_box_format = bounding_box_format self.min_level = min_level @@ -94,29 +93,23 @@ def call(self, inputs): image_shape = images_shape[1:-1] else: image_shape = images_shape[:-1] - image_shape = tuple(image_shape) - multilevel_anchors = {} for level in range(self.min_level, self.max_level + 1): # Calculate the feature map size for this level feat_size_y = math.ceil(image_shape[0] / 2**level) feat_size_x = math.ceil(image_shape[1] / 2**level) - # Calculate the stride (step size) for this level stride_y = image_shape[0] // feat_size_y stride_x = image_shape[1] // feat_size_x - # Generate anchor center points # Start from stride/2 to center anchors on pixels cx = ops.arange(0, feat_size_x, dtype="float32") * stride_x cy = ops.arange(0, feat_size_y, dtype="float32") * stride_y - # Create a grid of anchor centers cy_grid, cx_grid = ops.meshgrid(cy, cx, indexing="ij") cy_grid = ops.reshape(cy_grid, (-1,)) cx_grid = ops.reshape(cx_grid, (-1,)) - shifts = ops.stack((cx_grid, cy_grid, cx_grid, cy_grid), axis=1) sizes = [ int( @@ -124,7 +117,6 @@ def call(self, inputs): ) for scale in range(self.num_scales) ] - base_anchors = self.generate_base_anchors( sizes=sizes, aspect_ratios=self.aspect_ratios ) @@ -133,10 +125,12 @@ def call(self, inputs): anchors = shifts + base_anchors anchors = ops.reshape(anchors, (-1, 4)) - multilevel_anchors[f"P{level}"] = convert_format( - anchors, - source="xyxy", - target=self.bounding_box_format, + multilevel_anchors[f"P{level}"] = ( + keras.utils.bounding_boxes.convert_format( + anchors, + source="xyxy", + target=self.bounding_box_format, + ) ) return multilevel_anchors @@ -145,10 +139,8 @@ def generate_base_anchors(self, sizes, aspect_ratios): aspect_ratios = ops.convert_to_tensor(aspect_ratios) h_ratios = ops.sqrt(aspect_ratios) w_ratios = 1 / h_ratios - ws = ops.reshape(w_ratios[:, None] * sizes[None, :], (-1,)) hs = ops.reshape(h_ratios[:, None] * sizes[None, :], (-1,)) - base_anchors = ops.stack([-1 * ws, -1 * hs, ws, hs], axis=1) / 2 base_anchors = ops.round(base_anchors) return base_anchors @@ -159,7 +151,6 @@ def compute_output_shape(self, input_shape): image_height, image_width = input_shape[1:-1] else: image_height, image_width = input_shape[:-1] - for i in range(self.min_level, self.max_level + 1): multilevel_boxes_shape[f"P{i}"] = ( int( diff --git a/keras_hub/src/models/retinanet/anchor_generator_test.py b/keras_hub/src/layers/modeling/anchor_generator_test.py similarity index 82% rename from keras_hub/src/models/retinanet/anchor_generator_test.py rename to keras_hub/src/layers/modeling/anchor_generator_test.py index 0b71630843..ccb6d4a90f 100644 --- a/keras_hub/src/models/retinanet/anchor_generator_test.py +++ b/keras_hub/src/layers/modeling/anchor_generator_test.py @@ -1,12 +1,19 @@ +import keras import numpy as np +import pytest from absl.testing import parameterized from keras import ops +from packaging import version -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.tests.test_case import TestCase class AnchorGeneratorTest(TestCase): + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_layer_behaviors(self): images_shape = (8, 128, 128, 3) self.run_layer_test( @@ -47,6 +54,10 @@ def test_layer_behaviors(self): }, ), ) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_anchor_generator( self, bounding_box_format, diff --git a/keras_hub/src/models/retinanet/box_matcher.py b/keras_hub/src/layers/modeling/box_matcher.py similarity index 94% rename from keras_hub/src/models/retinanet/box_matcher.py rename to keras_hub/src/layers/modeling/box_matcher.py index dd8a486814..5f2df744f3 100644 --- a/keras_hub/src/models/retinanet/box_matcher.py +++ b/keras_hub/src/layers/modeling/box_matcher.py @@ -1,7 +1,11 @@ import keras from keras import ops +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.utils.tensor_utils import assert_bounding_box_support + +@keras_hub_export("keras_hub.layers.BoxMatcher") class BoxMatcher(keras.layers.Layer): """Box matching logic based on argmax of highest value (e.g., IOU). @@ -51,10 +55,16 @@ class BoxMatcher(keras.layers.Layer): Example: ```python - box_matcher = keras_cv.layers.BoxMatcher([0.3, 0.7], [-1, 0, 1]) - iou_metric = keras_cv.bounding_box.compute_iou(anchors, boxes) - matched_columns, matched_match_values = box_matcher(iou_metric) - cls_mask = ops.less_equal(matched_match_values, 0) + positive_threshold = 0.5 + negative_threshold = 0.4 + + matcher = keras_hub.layers.BoxMatcher( + thresholds=[negative_threshold, positive_threshold], + match_values=[-1, -2, 1], + ) + match_indices, matched_values = matcher(sim_matrix) + positive_mask = ops.equal(matched_vals, 1) + ignore_mask = ops.equal(matched_vals, -2) ``` """ @@ -66,6 +76,9 @@ def __init__( force_match_for_each_col=False, **kwargs, ): + # Check whether current version of keras support bounding box utils + assert_bounding_box_support(self.__class__.__name__) + super().__init__(**kwargs) if sorted(thresholds) != thresholds: raise ValueError(f"`threshold` must be sorted, got {thresholds}") diff --git a/keras_hub/src/models/retinanet/box_matcher_test.py b/keras_hub/src/layers/modeling/box_matcher_test.py similarity index 78% rename from keras_hub/src/models/retinanet/box_matcher_test.py rename to keras_hub/src/layers/modeling/box_matcher_test.py index d991f90e5b..68550c7ebc 100644 --- a/keras_hub/src/models/retinanet/box_matcher_test.py +++ b/keras_hub/src/layers/modeling/box_matcher_test.py @@ -1,11 +1,18 @@ +import keras import numpy as np +import pytest from keras import ops +from packaging import version -from keras_hub.src.models.retinanet.box_matcher import BoxMatcher +from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.tests.test_case import TestCase class BoxMatcherTest(TestCase): + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_box_matcher_invalid_length(self): fg_threshold = 0.5 bg_thresh_hi = 0.2 @@ -17,6 +24,10 @@ def test_box_matcher_invalid_length(self): match_values=[-3, -2, -1], ) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_box_matcher_unsorted_thresholds(self): fg_threshold = 0.5 bg_thresh_hi = 0.2 @@ -28,6 +39,10 @@ def test_box_matcher_unsorted_thresholds(self): match_values=[-3, -2, -1, 1], ) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_box_matcher_unbatched(self): sim_matrix = np.array([[0.04, 0, 0, 0], [0, 0, 1.0, 0]]) @@ -48,6 +63,10 @@ def test_box_matcher_unbatched(self): self.assertAllEqual(match_indices, [0, 2]) self.assertAllEqual(matched_values, [-2, 1]) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_box_matcher_batched(self): sim_matrix = np.array([[[0.04, 0, 0, 0], [0, 0, 1.0, 0]]]) @@ -68,6 +87,10 @@ def test_box_matcher_batched(self): self.assertAllEqual(match_indices, [[0, 2]]) self.assertAllEqual(matched_values, [[-2, 1]]) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_box_matcher_force_match(self): sim_matrix = np.array( [[0, 0.04, 0, 0.1], [0, 0, 1.0, 0], [0.1, 0, 0, 0], [0, 0, 0, 0.6]], @@ -93,6 +116,10 @@ def test_box_matcher_force_match(self): self.assertAllEqual(match_indices, [1, 2, 0, 3]) self.assertAllEqual(matched_values, [1, 1, 1, 1]) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_box_matcher_empty_gt_boxes(self): sim_matrix = np.array([[], []]) diff --git a/keras_hub/src/models/retinanet/non_max_supression.py b/keras_hub/src/layers/modeling/non_max_supression.py similarity index 85% rename from keras_hub/src/models/retinanet/non_max_supression.py rename to keras_hub/src/layers/modeling/non_max_supression.py index 5ca52b4dfc..d58f92b86a 100644 --- a/keras_hub/src/models/retinanet/non_max_supression.py +++ b/keras_hub/src/layers/modeling/non_max_supression.py @@ -3,32 +3,52 @@ import keras from keras import ops -# TODO: https://github.com/keras-team/keras-hub/issues/1965 -from keras_hub.src.bounding_box import converters -from keras_hub.src.bounding_box import utils -from keras_hub.src.bounding_box import validate_format +from keras_hub.src.api_export import keras_hub_export +from keras_hub.src.utils.tensor_utils import assert_bounding_box_support EPSILON = 1e-8 +@keras_hub_export("keras_hub.layers.NonMaxSuppression") class NonMaxSuppression(keras.layers.Layer): """A Keras layer that decodes predictions of an object detection model. Args: - bounding_box_format: The format of bounding boxes of input dataset. - Refer - TODO: link keras core bounding box docs - for more details on supported bounding box formats. + bounding_box_format: str. The format of bounding boxes of input dataset. + Refer `keras.utils.bounding_boxes.convert_format` args for more + details on supported bounding box formats. from_logits: boolean, True means input score is logits, False means confidence. - iou_threshold: a float value in the range [0, 1] representing the + iou_threshold: float. Value in the range [0, 1] representing the minimum IoU threshold for two boxes to be considered same for suppression. Defaults to 0.5. - confidence_threshold: a float value in the range [0, 1]. All boxes with + confidence_threshold: float. Value in the range [0, 1]. All boxes with confidence below this value will be discarded, defaults to 0.5. - max_detections: the maximum detections to consider after nms is applied. - A large number may trigger significant memory overhead, + max_detections: int. the maximum detections to consider after nms is + applied. A large number may trigger significant memory overhead, defaults to 100. + + Example: + ```python + boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) + classes = np.expand_dims( + np.array( + [[0.1, 0.1, 0.4, 0.5, 0.9], [0.7, 0.5, 0.3, 0.0, 0.0]], + "float32", + ), + axis=-1, + ) + + nms = keras_hub.layers.NonMaxSuppression( + bounding_box_format="yxyx", + from_logits=False, + iou_threshold=1.0, + confidence_threshold=0.1, + max_detections=1, + ) + + nms_outputs = nms(boxes, classes) + ``` """ def __init__( @@ -40,6 +60,8 @@ def __init__( max_detections=100, **kwargs, ): + # Check whether current version of keras support bounding box utils + assert_bounding_box_support(self.__class__.__name__) super().__init__(**kwargs) self.bounding_box_format = bounding_box_format self.from_logits = from_logits @@ -49,7 +71,10 @@ def __init__( self.built = True def call( - self, box_prediction, class_prediction, images=None, image_shape=None + self, + box_prediction, + class_prediction, + images=None, ): """Accepts images and raw scores, returning bounding box predictions. @@ -59,15 +84,24 @@ def call( class_prediction: Dense Tensor of shape [batch, boxes, num_classes]. """ target_format = "yxyx" - if utils.is_relative(self.bounding_box_format): - target_format = utils.as_relative(target_format) + height, width = None, None + + if "rel" in self.bounding_box_format and images is None: + raise ValueError( + "`images` cannot be None when using relative " + "bounding box format." + ) - box_prediction = converters.convert_format( + if "rel" in self.bounding_box_format: + target_format = "rel_" + target_format + height, width, _ = ops.shape(images) + + box_prediction = keras.utils.bounding_boxes.convert_format( box_prediction, source=self.bounding_box_format, target=target_format, - images=images, - image_shape=image_shape, + height=height, + width=width, ) if self.from_logits: class_prediction = ops.sigmoid(class_prediction) @@ -95,17 +129,17 @@ def call( class_prediction, ops.expand_dims(idx, axis=-1), axis=1 ) - box_prediction = converters.convert_format( + box_prediction = keras.utils.bounding_boxes.convert_format( box_prediction, source=target_format, target=self.bounding_box_format, - images=images, - image_shape=image_shape, + height=height, + width=width, ) bounding_boxes = { "boxes": box_prediction, "confidence": confidence_prediction, - "classes": ops.argmax(class_prediction, axis=-1), + "labels": ops.argmax(class_prediction, axis=-1), "num_detections": valid_det, } @@ -519,22 +553,40 @@ def mask_invalid_detections(bounding_boxes): returned value will also return `tf.RaggedTensor` representations. """ # ensure we are complying with Keras bounding box format. - info = validate_format.validate_format(bounding_boxes) - if info["ragged"]: + if ( + not isinstance(bounding_boxes, dict) + or "labels" not in bounding_boxes + or "boxes" not in bounding_boxes + ): raise ValueError( - "`bounding_box.mask_invalid_detections()` requires inputs to be " - "Dense tensors. Please call " - "`bounding_box.to_dense(bounding_boxes)` before passing your boxes " - "to `bounding_box.mask_invalid_detections()`." + "Expected `bounding_boxes` agurment to be a " + "dict with keys 'boxes' and 'labels'. Received: " + f"bounding_boxes={bounding_boxes}" ) + if "num_detections" not in bounding_boxes: raise ValueError( "`bounding_boxes` must have key 'num_detections' " - "to be used with `bounding_box.mask_invalid_detections()`." + "to be used with `mask_invalid_detections()`." ) boxes = bounding_boxes.get("boxes") - classes = bounding_boxes.get("classes") + labels = bounding_boxes.get("labels") + if isinstance(boxes, list): + if not isinstance(labels, list): + raise ValueError( + "If `bounding_boxes['boxes']` is a list, then " + "`bounding_boxes['labels']` must also be a list." + f"Received: bounding_boxes['labels']={labels}" + ) + if len(boxes) != len(labels): + raise ValueError( + "If `bounding_boxes['boxes']` and " + "`bounding_boxes['labels']` are both lists, " + "they must have the same length. Received: " + f"len(bounding_boxes['boxes'])={len(boxes)} and " + f"len(bounding_boxes['labels'])={len(labels)} and " + ) confidence = bounding_boxes.get("confidence", None) num_detections = bounding_boxes.get("num_detections") @@ -545,7 +597,7 @@ def mask_invalid_detections(bounding_boxes): ) mask = mask < num_detections[:, None] - classes = ops.where(mask, classes, -ops.ones_like(classes)) + labels = ops.where(mask, labels, -ops.ones_like(labels)) if confidence is not None: confidence = ops.where(mask, confidence, -ops.ones_like(confidence)) @@ -558,7 +610,7 @@ def mask_invalid_detections(bounding_boxes): result = bounding_boxes.copy() result["boxes"] = boxes - result["classes"] = classes + result["labels"] = labels if confidence is not None: result["confidence"] = confidence diff --git a/keras_hub/src/models/retinanet/non_max_supression_test.py b/keras_hub/src/layers/modeling/non_max_supression_test.py similarity index 72% rename from keras_hub/src/models/retinanet/non_max_supression_test.py rename to keras_hub/src/layers/modeling/non_max_supression_test.py index 94d3c3f124..b3cd634e98 100644 --- a/keras_hub/src/models/retinanet/non_max_supression_test.py +++ b/keras_hub/src/layers/modeling/non_max_supression_test.py @@ -1,11 +1,18 @@ +import keras import numpy as np +import pytest from keras import ops +from packaging import version -from keras_hub.src.models.retinanet.non_max_supression import NonMaxSuppression +from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression from keras_hub.src.tests.test_case import TestCase class NonMaxSupressionTest(TestCase): + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_confidence_threshold(self): boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) classes = ops.expand_dims( @@ -29,9 +36,13 @@ def test_confidence_threshold(self): self.assertAllClose( outputs["boxes"], [boxes[0][-2:, ...], boxes[1][:2, ...]] ) - self.assertAllClose(outputs["classes"], [[0.0, 0.0], [0.0, 0.0]]) + self.assertAllClose(outputs["labels"], [[0.0, 0.0], [0.0, 0.0]]) self.assertAllClose(outputs["confidence"], [[0.9, 0.5], [0.7, 0.5]]) + @pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", + ) def test_max_detections(self): boxes = np.random.uniform(low=0, high=1, size=(2, 5, 4)) classes = ops.expand_dims( @@ -55,5 +66,5 @@ def test_max_detections(self): self.assertAllClose( outputs["boxes"], [boxes[0][-1:, ...], boxes[1][:1, ...]] ) - self.assertAllClose(outputs["classes"], [[0.0], [0.0]]) + self.assertAllClose(outputs["labels"], [[0.0], [0.0]]) self.assertAllClose(outputs["confidence"], [[0.9], [0.7]]) diff --git a/keras_hub/src/models/image_object_detector.py b/keras_hub/src/models/object_detector.py similarity index 87% rename from keras_hub/src/models/image_object_detector.py rename to keras_hub/src/models/object_detector.py index f8eba20dc9..e248ced22c 100644 --- a/keras_hub/src/models/image_object_detector.py +++ b/keras_hub/src/models/object_detector.py @@ -4,20 +4,20 @@ from keras_hub.src.models.task import Task -@keras_hub_export("keras_hub.models.ImageObjectDetector") -class ImageObjectDetector(Task): +@keras_hub_export("keras_hub.models.ObjectDetector") +class ObjectDetector(Task): """Base class for all image object detection tasks. - The `ImageObjectDetector` tasks wrap a `keras_hub.models.Backbone` and + The `ObjectDetector` tasks wrap a `keras_hub.models.Backbone` and a `keras_hub.models.Preprocessor` to create a model that can be used for - object detection. `ImageObjectDetector` tasks take an additional + object detection. `ObjectDetector` tasks take an additional `num_classes` argument, controlling the number of predicted output classes. To fine-tune with `fit()`, pass a dataset containing tuples of `(x, y)` labels where `x` is a string and `y` is dictionary with `boxes` and `classes`. - All `ImageObjectDetector` tasks include a `from_preset()` constructor which + All `ObjectDetector` tasks include a `from_preset()` constructor which can be used to load a pre-trained config and weights. """ @@ -29,9 +29,9 @@ def compile( metrics=None, **kwargs, ): - """Configures the `ImageObjectDetector` task for training. + """Configures the `ObjectDetector` task for training. - The `ImageObjectDetector` task extends the default compilation signature + The `ObjectDetector` task extends the default compilation signature of `keras.Model.compile` with defaults for `optimizer`, `loss`, and `metrics`. To override these defaults, pass any value to these arguments during compilation. diff --git a/keras_hub/src/models/image_object_detector_preprocessor.py b/keras_hub/src/models/object_detector_preprocessor.py similarity index 67% rename from keras_hub/src/models/image_object_detector_preprocessor.py rename to keras_hub/src/models/object_detector_preprocessor.py index 2f89d216ef..439ddabf01 100644 --- a/keras_hub/src/models/image_object_detector_preprocessor.py +++ b/keras_hub/src/models/object_detector_preprocessor.py @@ -5,20 +5,20 @@ from keras_hub.src.utils.tensor_utils import preprocessing_function -@keras_hub_export("keras_hub.models.ImageObjectDetectorPreprocessor") -class ImageObjectDetectorPreprocessor(Preprocessor): +@keras_hub_export("keras_hub.models.ObjectDetectorPreprocessor") +class ObjectDetectorPreprocessor(Preprocessor): """Base class for object detector preprocessing layers. - `ImageObjectDetectorPreprocessor` tasks wraps a + `ObjectDetectorPreprocessor` tasks wraps a `keras_hub.layers.Preprocessor` to create a preprocessing layer for object detection tasks. It is intended to be paired with a `keras_hub.models.ImageObjectDetector` task. - All `ImageObjectDetectorPreprocessor` take three inputs, `x`, `y`, and + All `ObjectDetectorPreprocessor` take three inputs, `x`, `y`, and `sample_weight`. `x`, the first input, should always be included. It can be a image or batch of images. See examples below. `y` and `sample_weight` - are optional inputs that will be passed through unaltered. Usually, `y` will - be the a dict of `{"boxes": Tensor(batch_size, num_boxes, 4), + are optional inputs that will be passed through unaltered. Usually, `y` + willbe the a dict of `{"boxes": Tensor(batch_size, num_boxes, 4), "classes": (batch_size, num_boxes)}. The layer will returns either `x`, an `(x, y)` tuple if labels were @@ -26,18 +26,18 @@ class ImageObjectDetectorPreprocessor(Preprocessor): were provided. `x` will be the input images after all model preprocessing has been applied. - All `ImageObjectDetectorPreprocessor` tasks include a `from_preset()` - constructor which can be used to load a pre-trained config and vocabularies. - You can call the `from_preset()` constructor directly on this base class, in - which case the correct class for your model will be automatically - instantiated. + All `ObjectDetectorPreprocessor` tasks include a `from_preset()` + constructor which can be used to load a pre-trained config and + vocabularies. You can call the `from_preset()` constructor directly on + this base class, in which case the correct class for your model will be + automatically instantiated. Args: image_converter: Preprocessing pipeline for images. Examples. ```python - preprocessor = keras_hub.models.ImageObjectDetectorPreprocessor.from_preset( + preprocessor = keras_hub.models.ObjectDetectorPreprocessor.from_preset( "retinanet_resnet50", ) """ @@ -53,5 +53,5 @@ def __init__( @preprocessing_function def call(self, x, y=None, sample_weight=None): if self.image_converter: - x = self.image_converter(x) + x, y, sample_weight = self.image_converter(x, y, sample_weight) return keras.utils.pack_x_y_sample_weight(x, y, sample_weight) diff --git a/keras_hub/src/models/retinanet/retinanet_image_converter.py b/keras_hub/src/models/retinanet/retinanet_image_converter.py index 6d26323a0a..fb25b42eff 100644 --- a/keras_hub/src/models/retinanet/retinanet_image_converter.py +++ b/keras_hub/src/models/retinanet/retinanet_image_converter.py @@ -1,3 +1,5 @@ +import keras + from keras_hub.src.api_export import keras_hub_export from keras_hub.src.layers.preprocessing.image_converter import ImageConverter from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone @@ -15,20 +17,37 @@ def __init__( offset=None, norm_mean=[0.485, 0.456, 0.406], norm_std=[0.229, 0.224, 0.225], + bounding_box_format="yxyx", **kwargs, ): - super().__init__(**kwargs) + super().__init__(image_size=image_size, **kwargs) + self.resizing_bbox = keras.layers.Resizing( + height=image_size[0] if image_size else None, + width=image_size[1] if image_size else None, + bounding_box_format=bounding_box_format, + crop_to_aspect_ratio=self.crop_to_aspect_ratio, + pad_to_aspect_ratio=self.pad_to_aspect_ratio, + interpolation=self.interpolation, + data_format=self.data_format, + dtype=self.dtype_policy, + name="resizing_bbox", + ) + self.image_size = image_size self.scale = scale self.offset = offset self.norm_mean = norm_mean self.norm_std = norm_std - self.built = True + self.bounding_box_format = bounding_box_format @preprocessing_function - def call(self, inputs): - # TODO: https://github.com/keras-team/keras-hub/issues/1965 - x = inputs + def call(self, x, y=None, sample_weight=None): + if y is not None: + inputs = self.resizing_bbox({"images": x, "bounding_boxes": y}) + x = inputs["images"] + y = inputs["bounding_boxes"] + else: + x = self.resizing(x) # Use while prediction pipeline # Rescaling Image if self.scale is not None: x = x * self._expand_non_channel_dims(self.scale, x) @@ -40,7 +59,7 @@ def call(self, inputs): if self.norm_std: x = x / self._expand_non_channel_dims(self.norm_std, x) - return x + return x, y, sample_weight def get_config(self): config = super().get_config() @@ -48,6 +67,7 @@ def get_config(self): { "norm_mean": self.norm_mean, "norm_std": self.norm_std, + "bounding_box_format": self.bounding_box_format, } ) return config diff --git a/keras_hub/src/models/retinanet/retinanet_label_encoder.py b/keras_hub/src/models/retinanet/retinanet_label_encoder.py index 66a6ff6d78..886d86422d 100644 --- a/keras_hub/src/models/retinanet/retinanet_label_encoder.py +++ b/keras_hub/src/models/retinanet/retinanet_label_encoder.py @@ -4,10 +4,7 @@ from keras import ops # TODO: https://github.com/keras-team/keras-hub/issues/1965 -from keras_hub.src.bounding_box.converters import convert_format -from keras_hub.src.bounding_box.converters import encode_box_to_deltas -from keras_hub.src.bounding_box.iou import compute_iou -from keras_hub.src.models.retinanet.box_matcher import BoxMatcher +from keras_hub.src.layers.modeling.box_matcher import BoxMatcher from keras_hub.src.utils import tensor_utils @@ -113,7 +110,7 @@ def call(self, images, gt_boxes, gt_classes): "support unbatched inputs for the `images` argument. " f"Received `shape(images)={images_shape}`." ) - image_shape = images_shape[1:] + height, width, _ = images_shape[1:] if len(ops.shape(gt_classes)) == 2: gt_classes = ops.expand_dims(gt_classes, axis=-1) @@ -122,14 +119,14 @@ def call(self, images, gt_boxes, gt_classes): anchor_boxes = ops.concatenate(list(anchor_boxes.values()), axis=0) box_targets, class_targets = self._encode_sample( - gt_boxes, gt_classes, anchor_boxes, image_shape + gt_boxes, gt_classes, anchor_boxes, height, width ) box_targets = ops.reshape( box_targets, (-1, ops.shape(box_targets)[1], 4) ) return box_targets, class_targets - def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, image_shape): + def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, height, width): """Creates box and classification targets for a batched sample. Matches ground truth boxes to anchor boxes based on IOU. @@ -149,23 +146,25 @@ def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, image_shape): anchor_boxes: A Tensor with the shape `[total_anchors, 4]` representing all the anchor boxes for a given input image shape, where each anchor box is of the format `[x, y, width, height]`. - image_shape: Tuple indicating the image shape `[H, W, C]`. + height: int. + width: int. Returns: Encoded bounding boxes in the format of `center_yxwh` and corresponding labels for each encoded bounding box. """ - anchor_boxes = convert_format( + anchor_boxes = keras.utils.bounding_boxes.convert_format( anchor_boxes, source=self.anchor_generator.bounding_box_format, target=self.bounding_box_format, - image_shape=image_shape, + height=height, + width=width, ) - iou_matrix = compute_iou( + iou_matrix = keras.utils.bounding_boxes.compute_iou( anchor_boxes, gt_boxes, bounding_box_format=self.bounding_box_format, - image_shape=image_shape, + image_shape=(height, width, 3), ) matched_gt_idx, matched_vals = self.box_matcher(iou_matrix) @@ -179,14 +178,14 @@ def _encode_sample(self, gt_boxes, gt_classes, anchor_boxes, image_shape): matched_gt_boxes, (-1, ops.shape(matched_gt_boxes)[1], 4) ) - box_targets = encode_box_to_deltas( + box_targets = keras.utils.bounding_boxes.encode_box_to_deltas( anchors=anchor_boxes, boxes=matched_gt_boxes, anchor_format=self.bounding_box_format, box_format=self.bounding_box_format, encoding_format=self.encoding_format, variance=self.box_variance, - image_shape=image_shape, + image_shape=(height, width, 3), ) matched_gt_cls_ids = tensor_utils.target_gather( diff --git a/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py b/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py index d05bf5a99a..db7818e449 100644 --- a/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py +++ b/keras_hub/src/models/retinanet/retinanet_label_encoder_test.py @@ -1,13 +1,20 @@ +import keras import numpy as np +import pytest from keras import ops +from packaging import version -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.models.retinanet.retinanet_label_encoder import ( RetinaNetLabelEncoder, ) from keras_hub.src.tests.test_case import TestCase +@pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", +) class RetinaNetLabelEncoderTest(TestCase): def setUp(self): anchor_generator = AnchorGenerator( diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector.py b/keras_hub/src/models/retinanet/retinanet_object_detector.py index 14b3a631c5..efc4983512 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector.py @@ -2,13 +2,9 @@ from keras import ops from keras_hub.src.api_export import keras_hub_export - -# TODO: https://github.com/keras-team/keras-hub/issues/1965 -from keras_hub.src.bounding_box.converters import convert_format -from keras_hub.src.bounding_box.converters import decode_deltas_to_boxes -from keras_hub.src.models.image_object_detector import ImageObjectDetector -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator -from keras_hub.src.models.retinanet.non_max_supression import NonMaxSuppression +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator +from keras_hub.src.layers.modeling.non_max_supression import NonMaxSuppression +from keras_hub.src.models.object_detector import ObjectDetector from keras_hub.src.models.retinanet.prediction_head import PredictionHead from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_label_encoder import ( @@ -17,10 +13,11 @@ from keras_hub.src.models.retinanet.retinanet_object_detector_preprocessor import ( # noqa: E501 RetinaNetObjectDetectorPreprocessor, ) +from keras_hub.src.utils.tensor_utils import assert_bounding_box_support @keras_hub_export("keras_hub.models.RetinaNetObjectDetector") -class RetinaNetObjectDetector(ImageObjectDetector): +class RetinaNetObjectDetector(ObjectDetector): """RetinaNet object detector model. This class implements the RetinaNet object detection architecture. @@ -107,6 +104,9 @@ def __init__( prediction_decoder=None, **kwargs, ): + # Check whether current version of keras support bounding box utils + assert_bounding_box_support(self.__class__.__name__) + # === Layers === image_input = keras.layers.Input(backbone.image_shape, name="images") head_dtype = dtype or backbone.dtype_policy @@ -204,17 +204,19 @@ def __init__( ) def compute_loss(self, x, y, y_pred, sample_weight, **kwargs): - y_for_label_encoder = convert_format( + _, height, width, _ = keras.ops.shape(x) + y_for_label_encoder = keras.utils.bounding_boxes.convert_format( y, source=self.bounding_box_format, target=self.label_encoder.bounding_box_format, - images=x, + height=height, + width=width, ) - boxes, classes = self.label_encoder( + boxes, labels = self.label_encoder( images=x, gt_boxes=y_for_label_encoder["boxes"], - gt_classes=y_for_label_encoder["classes"], + gt_classes=y_for_label_encoder["labels"], ) box_pred = y_pred["bbox_regression"] @@ -242,11 +244,11 @@ def compute_loss(self, x, y, y_pred, sample_weight, **kwargs): ) cls_labels = ops.one_hot( - ops.cast(classes, "int32"), self.num_classes, dtype="float32" + ops.cast(labels, "int32"), self.num_classes, dtype="float32" ) - positive_mask = ops.cast(ops.greater(classes, -1.0), dtype="float32") + positive_mask = ops.cast(ops.greater(labels, -1.0), dtype="float32") normalizer = ops.sum(positive_mask) - cls_weights = ops.cast(ops.not_equal(classes, -2.0), dtype="float32") + cls_weights = ops.cast(ops.not_equal(labels, -2.0), dtype="float32") cls_weights /= normalizer box_weights = positive_mask / normalizer @@ -306,32 +308,32 @@ def decode_predictions(self, predictions, data): images, _ = data else: images = data - image_shape = ops.shape(images)[1:] + height, width, channels = ops.shape(images)[1:] anchor_boxes = self.anchor_generator(images) anchor_boxes = ops.concatenate(list(anchor_boxes.values()), axis=0) - box_pred = decode_deltas_to_boxes( + box_pred = keras.utils.bounding_boxes.decode_deltas_to_boxes( anchors=anchor_boxes, boxes_delta=box_pred, encoded_format="center_xywh", anchor_format=self.anchor_generator.bounding_box_format, box_format=self.bounding_box_format, - image_shape=image_shape, + image_shape=(height, width, channels), ) # box_pred is now in "self.bounding_box_format" format - box_pred = convert_format( + box_pred = keras.utils.bounding_boxes.convert_format( box_pred, source=self.bounding_box_format, target=self.prediction_decoder.bounding_box_format, - image_shape=image_shape, - ) - y_pred = self.prediction_decoder( - box_pred, cls_pred, image_shape=image_shape + height=height, + width=width, ) - y_pred["boxes"] = convert_format( + y_pred = self.prediction_decoder(box_pred, cls_pred, images=images) + y_pred["boxes"] = keras.utils.bounding_boxes.convert_format( y_pred["boxes"], source=self.prediction_decoder.bounding_box_format, target=self.bounding_box_format, - image_shape=image_shape, + height=height, + width=width, ) return y_pred diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py b/keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py index 8bc6d1f796..caa46be1d3 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector_preprocessor.py @@ -1,6 +1,6 @@ from keras_hub.src.api_export import keras_hub_export -from keras_hub.src.models.image_object_detector_preprocessor import ( - ImageObjectDetectorPreprocessor, +from keras_hub.src.models.object_detector_preprocessor import ( + ObjectDetectorPreprocessor, ) from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_image_converter import ( @@ -9,6 +9,6 @@ @keras_hub_export("keras_hub.models.RetinaNetObjectDetectorPreprocessor") -class RetinaNetObjectDetectorPreprocessor(ImageObjectDetectorPreprocessor): +class RetinaNetObjectDetectorPreprocessor(ObjectDetectorPreprocessor): backbone_cls = RetinaNetBackbone image_converter_cls = RetinaNetImageConverter diff --git a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py index 53d7461bb1..5e01c802a5 100644 --- a/keras_hub/src/models/retinanet/retinanet_object_detector_test.py +++ b/keras_hub/src/models/retinanet/retinanet_object_detector_test.py @@ -1,8 +1,10 @@ +import keras import numpy as np import pytest +from packaging import version +from keras_hub.src.layers.modeling.anchor_generator import AnchorGenerator from keras_hub.src.models.resnet.resnet_backbone import ResNetBackbone -from keras_hub.src.models.retinanet.anchor_generator import AnchorGenerator from keras_hub.src.models.retinanet.retinanet_backbone import RetinaNetBackbone from keras_hub.src.models.retinanet.retinanet_image_converter import ( RetinaNetImageConverter, @@ -19,6 +21,10 @@ from keras_hub.src.tests.test_case import TestCase +@pytest.mark.skipif( + version.parse(keras.__version__) < version.parse("3.8.0"), + reason="Bbox utils are not supported before keras < 3.8.0", +) class RetinaNetObjectDetectorTest(TestCase): def setUp(self): resnet_kwargs = { @@ -53,7 +59,9 @@ def setUp(self): bounding_box_format="yxyx", anchor_generator=anchor_generator ) - image_converter = RetinaNetImageConverter(scale=1 / 255.0) + image_converter = RetinaNetImageConverter( + bounding_box_format="yxyx", scale=1 / 255.0, image_size=(800, 800) + ) preprocessor = RetinaNetObjectDetectorPreprocessor( image_converter=image_converter @@ -76,7 +84,7 @@ def setUp(self): "boxes": np.array( [[[20.0, 10.0, 12.0, 11.0], [30.0, 20.0, 40.0, 12.0]]] ), - "classes": np.array([[0, 2]]), + "labels": np.array([[0, 2]]), } self.train_data = (self.images, self.labels) @@ -87,7 +95,7 @@ def test_detection_basics(self): train_data=self.train_data, expected_output_shape={ "boxes": (1, 100, 4), - "classes": (1, 100), + "labels": (1, 100), "confidence": (1, 100), "num_detections": (1,), }, diff --git a/keras_hub/src/utils/tensor_utils.py b/keras_hub/src/utils/tensor_utils.py index 19eb6975d7..29f9ed8414 100644 --- a/keras_hub/src/utils/tensor_utils.py +++ b/keras_hub/src/utils/tensor_utils.py @@ -1,11 +1,13 @@ import contextlib import functools import inspect +import re import threading import keras import numpy as np from keras import ops +from packaging import version try: import tensorflow as tf @@ -262,6 +264,15 @@ def assert_tf_libs_installed(symbol_name): ) +def assert_bounding_box_support(symbol_name): + keras_version = re.sub(r".dev.*", "", keras.__version__) + if version.parse(keras_version) < version.parse("3.8.0"): + raise ImportError( + f"{symbol_name} requires Keras version to be 3.8.0 or higher. " + f"Current keras version: {keras.__version__}" + ) + + def assert_tf_backend(symbol_name): if keras.config.backend() != "tensorflow": raise RuntimeError(