Skip to content

Commit 75b6a4f

Browse files
authored
Hotfix/alg 1470 drop boxes padding (#1107)
* fix: Remove targets padding and un-padding from all transforms and collate * fix: Update all usage of transforms and the collate * fix: Update PPYolo collate * feature: Deprecation warning for max_targets * fix: Test bug * refactor: Use super * feature: DatasetItemsException
1 parent 83ce129 commit 75b6a4f

14 files changed

+121
-124
lines changed

documentation/source/ObjectDetection.md

+2-5
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,11 @@ MY_CLASSES = ['cat', 'dog', 'donut']
259259
@register_dataset("MyNewDetectionDataset")
260260
class MyNewDetectionDataset(DetectionDataset):
261261
def __init__(self, data_dir: str, samples_dir: str, targets_dir: str, input_dim: Tuple[int, int],
262-
transforms: List[DetectionTransform], max_targets: int = 100, max_num_samples: int = None,
262+
transforms: List[DetectionTransform], max_num_samples: int = None,
263263
class_inclusion_list: Optional[List[str]] = None, **kwargs):
264264
self.sample_paths = None
265265
self.samples_sub_directory = samples_dir
266266
self.targets_sub_directory = targets_dir
267-
self.max_targets = max_targets
268267
269268
# setting cache as False to be able to load non-resized images and crop in one of transforms
270269
super().__init__(data_dir=data_dir, input_dim=input_dim,
@@ -310,9 +309,7 @@ class MyNewDetectionDataset(DetectionDataset):
310309
lines = targets_file.read().splitlines()
311310
target = np.array([x.strip().strip(',').split(',') for x in lines], dtype=np.float32)
312311
313-
res_target = np.zeros((self.max_targets, 5)) # cls, cx, cy, w, h
314-
if len(target) != 0:
315-
res_target[:len(target)] = target
312+
res_target = np.array(target) if len(target) != 0 else np.zeros((0, 5)) # cls, cx, cy, w, h
316313
annotation = {
317314
'img_path': os.path.join(self.data_dir, sample_path),
318315
'target': res_target,

src/super_gradients/recipes/dataset_params/coco_detection_dataset_params.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ train_dataset_params:
3333
prob: 0.5 # probability to apply horizontal flip
3434
- DetectionPaddedRescale:
3535
input_dim: ${dataset_params.train_dataset_params.input_dim}
36-
max_targets: 120
3736
- DetectionTargetsFormatTransform:
3837
input_dim: ${dataset_params.train_dataset_params.input_dim}
3938
output_format: LABEL_CXCYWH
@@ -65,7 +64,6 @@ val_dataset_params:
6564
- DetectionPaddedRescale:
6665
input_dim: ${dataset_params.val_dataset_params.input_dim}
6766
- DetectionTargetsFormatTransform:
68-
max_targets: 50
6967
input_dim: ${dataset_params.val_dataset_params.input_dim}
7068
output_format: LABEL_CXCYWH
7169
tight_box_rotation: False

src/super_gradients/recipes/dataset_params/coco_detection_ppyoloe_dataset_params.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ train_dataset_params:
3636
mean: [ 123.675, 116.28, 103.53 ]
3737
std: [ 58.395, 57.12, 57.375 ]
3838
- DetectionTargetsFormatTransform:
39-
max_targets: 256
4039
output_format: LABEL_CXCYWH
4140

4241
tight_box_rotation: False
@@ -78,7 +77,6 @@ val_dataset_params:
7877
mean: [ 123.675, 116.28, 103.53 ]
7978
std: [ 58.395, 57.12, 57.375 ]
8079
- DetectionTargetsFormatTransform:
81-
max_targets: 256
8280
output_format: LABEL_CXCYWH
8381
tight_box_rotation: False
8482
class_inclusion_list:

src/super_gradients/recipes/dataset_params/coco_detection_ssd_lite_mobilenet_v2_dataset_params.yaml

-3
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,8 @@ train_dataset_params:
2828
prob: 0.5 # probability to apply horizontal flip
2929
- DetectionPaddedRescale:
3030
input_dim: ${dataset_params.train_dataset_params.input_dim}
31-
max_targets: 120
3231
- DetectionTargetsFormatTransform:
3332
input_dim: ${dataset_params.train_dataset_params.input_dim}
34-
max_targets: 50
3533
output_format: LABEL_NORMALIZED_CXCYWH
3634

3735
tight_box_rotation: False
@@ -63,7 +61,6 @@ val_dataset_params:
6361
input_dim: ${dataset_params.val_dataset_params.input_dim}
6462
- DetectionTargetsFormatTransform:
6563
input_dim: ${dataset_params.val_dataset_params.input_dim}
66-
max_targets: 50
6764
output_format: LABEL_NORMALIZED_CXCYWH
6865
tight_box_rotation: False
6966
class_inclusion_list:

src/super_gradients/recipes/dataset_params/coco_detection_yolo_format_base_dataset_params.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ train_dataset_params:
4141
prob: 0.5 # probability to apply horizontal flip
4242
- DetectionPaddedRescale:
4343
input_dim: ${dataset_params.train_dataset_params.input_dim}
44-
max_targets: 120
4544
- DetectionTargetsFormatTransform:
4645
input_dim: ${dataset_params.train_dataset_params.input_dim}
4746
output_format: LABEL_CXCYWH
@@ -75,7 +74,6 @@ val_dataset_params:
7574
- DetectionPaddedRescale:
7675
input_dim: ${dataset_params.val_dataset_params.input_dim}
7776
- DetectionTargetsFormatTransform:
78-
max_targets: 50
7977
input_dim: ${dataset_params.val_dataset_params.input_dim}
8078
output_format: LABEL_CXCYWH
8179
class_inclusion_list:

src/super_gradients/recipes/dataset_params/coco_detection_yolo_nas_dataset_params.yaml

-3
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,10 @@ train_dataset_params:
3232
flip_prob: 0.5 # probability to apply horizontal flip
3333
- DetectionPaddedRescale:
3434
input_dim: ${dataset_params.train_dataset_params.input_dim}
35-
max_targets: 120
3635
pad_value: 114
3736
- DetectionStandardize:
3837
max_value: 255.
3938
- DetectionTargetsFormatTransform:
40-
max_targets: 256
4139
output_format: LABEL_CXCYWH
4240

4341
tight_box_rotation: False
@@ -71,7 +69,6 @@ val_dataset_params:
7169
max_value: 255.
7270
- DetectionImagePermute
7371
- DetectionTargetsFormatTransform:
74-
max_targets: 50
7572
input_dim: [640, 640]
7673
output_format: LABEL_CXCYWH
7774
tight_box_rotation: False

src/super_gradients/recipes/dataset_params/pascal_voc_detection_dataset_params.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ train_dataset_params:
77
- DetectionPaddedRescale:
88
input_dim: ${dataset_params.train_dataset_params.input_dim}
99
- DetectionTargetsFormatTransform:
10-
max_targets: 50
1110
input_dim: ${dataset_params.train_dataset_params.input_dim}
1211
output_format: LABEL_CXCYWH
1312
class_inclusion_list:
@@ -23,7 +22,6 @@ val_dataset_params:
2322
- DetectionPaddedRescale:
2423
input_dim: ${dataset_params.train_dataset_params.input_dim}
2524
- DetectionTargetsFormatTransform:
26-
max_targets: 50
2725
input_dim: ${dataset_params.train_dataset_params.input_dim}
2826
output_format: LABEL_CXCYWH
2927
images_sub_directory: images/test2007/

src/super_gradients/recipes/dataset_params/roboflow_detection_dataset_params.yaml

-4
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,9 @@ train_dataset_params:
3939
prob: 0.5 # probability to apply horizontal flip
4040
- DetectionPaddedRescale:
4141
input_dim: ${dataset_params.train_dataset_params.input_dim}
42-
max_targets: 300
4342
- DetectionStandardize:
4443
max_value: 255.
4544
- DetectionTargetsFormatTransform:
46-
max_targets: 300
4745
input_dim: ${dataset_params.train_dataset_params.input_dim}
4846
output_format: LABEL_CXCYWH
4947
tight_box_rotation: False
@@ -76,12 +74,10 @@ val_dataset_params:
7674
transforms:
7775
- DetectionPaddedRescale:
7876
input_dim: ${dataset_params.val_dataset_params.input_dim}
79-
max_targets: 300
8077
pad_value: 114
8178
- DetectionStandardize:
8279
max_value: 255.
8380
- DetectionTargetsFormatTransform:
84-
max_targets: 300
8581
input_dim: ${dataset_params.val_dataset_params.input_dim}
8682
output_format: LABEL_CXCYWH
8783
tight_box_rotation: False

src/super_gradients/training/transforms/transforms.py

+23-22
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import collections
22
import math
33
import random
4+
import warnings
45
from numbers import Number
56
from typing import Optional, Union, Tuple, List, Sequence, Dict
67

@@ -776,18 +777,20 @@ class DetectionPaddedRescale(DetectionTransform):
776777
Preprocessing transform to be applied last of all transforms for validation.
777778
778779
Image- Rescales and pads to self.input_dim.
779-
Targets- pads targets to max_targets, moves the class label to first index, converts boxes format- xyxy -> cxcywh.
780+
Targets- moves the class label to first index, converts boxes format- xyxy -> cxcywh.
780781
781782
:param input_dim: Final input dimension (default=(640,640))
782783
:param swap: Image axis's to be rearranged.
783-
:param max_targets:
784784
:param pad_value: Padding value for image.
785785
"""
786786

787-
def __init__(self, input_dim: Union[int, Tuple[int, int], None], swap: Tuple[int, ...] = (2, 0, 1), max_targets: int = 50, pad_value: int = 114):
787+
def __init__(
788+
self, input_dim: Union[int, Tuple[int, int], None], swap: Tuple[int, ...] = (2, 0, 1), max_targets: Optional[int] = None, pad_value: int = 114
789+
):
790+
super().__init__()
791+
_max_targets_deprication(max_targets)
788792
self.swap = swap
789793
self.input_dim = ensure_is_tuple_of_two(input_dim)
790-
self.max_targets = max_targets
791794
self.pad_value = pad_value
792795

793796
def __call__(self, sample: dict) -> dict:
@@ -814,20 +817,18 @@ class DetectionHorizontalFlip(DetectionTransform):
814817
Horizontal Flip for Detection
815818
816819
:param prob: Probability of applying horizontal flip
817-
:param max_targets: Max objects in single image, padding target to this size in case of empty image.
818820
"""
819821

820-
def __init__(self, prob: float, max_targets: int = 120):
822+
def __init__(self, prob: float, max_targets: Optional[int] = None):
821823
super(DetectionHorizontalFlip, self).__init__()
824+
_max_targets_deprication(max_targets)
822825
self.prob = prob
823-
self.max_targets = max_targets
824826

825827
def __call__(self, sample):
826828
image, targets = sample["image"], sample["target"]
829+
if len(targets) == 0:
830+
targets = np.zeros((0, 5), dtype=np.float32)
827831
boxes = targets[:, :4]
828-
if len(boxes) == 0:
829-
targets = np.zeros((self.max_targets, 5), dtype=np.float32)
830-
boxes = targets[:, :4]
831832
image, boxes = _mirror(image, boxes, self.prob)
832833
targets[:, :4] = boxes
833834
sample["target"] = targets
@@ -1012,7 +1013,6 @@ class DetectionTargetsFormatTransform(DetectionTransform):
10121013
:param input_format: Format of the input targets. For instance [xmin, ymin, xmax, ymax, cls_id] refers to XYXY_LABEL.
10131014
:param output_format: Format of the output targets. For instance [xmin, ymin, xmax, ymax, cls_id] refers to XYXY_LABEL
10141015
:param min_bbox_edge_size: bboxes with edge size lower then this values will be removed.
1015-
:param max_targets: Max objects in single image, padding target to this size.
10161016
"""
10171017

10181018
@resolve_param("input_format", ConcatenatedTensorFormatFactory())
@@ -1023,9 +1023,10 @@ def __init__(
10231023
input_format: ConcatenatedTensorFormat = XYXY_LABEL,
10241024
output_format: ConcatenatedTensorFormat = LABEL_CXCYWH,
10251025
min_bbox_edge_size: float = 1,
1026-
max_targets: int = 120,
1026+
max_targets: Optional[int] = None,
10271027
):
10281028
super(DetectionTargetsFormatTransform, self).__init__()
1029+
_max_targets_deprication(max_targets)
10291030
if isinstance(input_format, DetectionTargetsFormat) or isinstance(output_format, DetectionTargetsFormat):
10301031
raise TypeError(
10311032
"DetectionTargetsFormat is not supported for input_format and output_format starting from super_gradients==3.0.7.\n"
@@ -1035,7 +1036,6 @@ def __init__(
10351036
)
10361037
self.input_format = input_format
10371038
self.output_format = output_format
1038-
self.max_targets = max_targets
10391039
self.min_bbox_edge_size = min_bbox_edge_size
10401040
self.input_dim = None
10411041

@@ -1066,8 +1066,7 @@ def apply_on_targets(self, targets: np.ndarray) -> np.ndarray:
10661066
"""Convert targets in input_format to output_format, filter small bboxes and pad targets"""
10671067
targets = self.targets_format_converter(targets)
10681068
targets = self.filter_small_bboxes(targets)
1069-
targets = self.pad_targets(targets)
1070-
return targets
1069+
return np.ascontiguousarray(targets, dtype=np.float32)
10711070

10721071
def filter_small_bboxes(self, targets: np.ndarray) -> np.ndarray:
10731072
"""Filter bboxes smaller than specified threshold."""
@@ -1078,13 +1077,6 @@ def _is_big_enough(bboxes: np.ndarray) -> np.ndarray:
10781077
targets = filter_on_bboxes(fn=_is_big_enough, tensor=targets, tensor_format=self.output_format)
10791078
return targets
10801079

1081-
def pad_targets(self, targets: np.ndarray) -> np.ndarray:
1082-
"""Pad targets."""
1083-
padded_targets = np.zeros((self.max_targets, targets.shape[-1]))
1084-
padded_targets[range(len(targets))[: self.max_targets]] = targets[: self.max_targets]
1085-
padded_targets = np.ascontiguousarray(padded_targets, dtype=np.float32)
1086-
return padded_targets
1087-
10881080
def get_equivalent_preprocessing(self) -> List:
10891081
return []
10901082

@@ -1331,3 +1323,12 @@ def __init__(self, max_val=255.0):
13311323

13321324
def forward(self, img):
13331325
return img / self.max_val
1326+
1327+
1328+
def _max_targets_deprication(max_targets: Optional[int] = None):
1329+
if max_targets is not None:
1330+
warnings.warn(
1331+
"max_targets is deprecated and will be removed in the future, targets are not padded to the max length anymore. "
1332+
"If you are using collate_fn provided by SG, it is safe to simply drop this argument.",
1333+
DeprecationWarning,
1334+
)

0 commit comments

Comments
 (0)