1
1
import collections
2
2
import math
3
3
import random
4
+ import warnings
4
5
from numbers import Number
5
6
from typing import Optional , Union , Tuple , List , Sequence , Dict
6
7
@@ -776,18 +777,20 @@ class DetectionPaddedRescale(DetectionTransform):
776
777
Preprocessing transform to be applied last of all transforms for validation.
777
778
778
779
Image- Rescales and pads to self.input_dim.
779
- Targets- pads targets to max_targets, moves the class label to first index, converts boxes format- xyxy -> cxcywh.
780
+ Targets- moves the class label to first index, converts boxes format- xyxy -> cxcywh.
780
781
781
782
:param input_dim: Final input dimension (default=(640,640))
782
783
:param swap: Image axis's to be rearranged.
783
- :param max_targets:
784
784
:param pad_value: Padding value for image.
785
785
"""
786
786
787
- def __init__ (self , input_dim : Union [int , Tuple [int , int ], None ], swap : Tuple [int , ...] = (2 , 0 , 1 ), max_targets : int = 50 , pad_value : int = 114 ):
787
+ def __init__ (
788
+ self , input_dim : Union [int , Tuple [int , int ], None ], swap : Tuple [int , ...] = (2 , 0 , 1 ), max_targets : Optional [int ] = None , pad_value : int = 114
789
+ ):
790
+ super ().__init__ ()
791
+ _max_targets_deprication (max_targets )
788
792
self .swap = swap
789
793
self .input_dim = ensure_is_tuple_of_two (input_dim )
790
- self .max_targets = max_targets
791
794
self .pad_value = pad_value
792
795
793
796
def __call__ (self , sample : dict ) -> dict :
@@ -814,20 +817,18 @@ class DetectionHorizontalFlip(DetectionTransform):
814
817
Horizontal Flip for Detection
815
818
816
819
:param prob: Probability of applying horizontal flip
817
- :param max_targets: Max objects in single image, padding target to this size in case of empty image.
818
820
"""
819
821
820
- def __init__ (self , prob : float , max_targets : int = 120 ):
822
+ def __init__ (self , prob : float , max_targets : Optional [ int ] = None ):
821
823
super (DetectionHorizontalFlip , self ).__init__ ()
824
+ _max_targets_deprication (max_targets )
822
825
self .prob = prob
823
- self .max_targets = max_targets
824
826
825
827
def __call__ (self , sample ):
826
828
image , targets = sample ["image" ], sample ["target" ]
829
+ if len (targets ) == 0 :
830
+ targets = np .zeros ((0 , 5 ), dtype = np .float32 )
827
831
boxes = targets [:, :4 ]
828
- if len (boxes ) == 0 :
829
- targets = np .zeros ((self .max_targets , 5 ), dtype = np .float32 )
830
- boxes = targets [:, :4 ]
831
832
image , boxes = _mirror (image , boxes , self .prob )
832
833
targets [:, :4 ] = boxes
833
834
sample ["target" ] = targets
@@ -1012,7 +1013,6 @@ class DetectionTargetsFormatTransform(DetectionTransform):
1012
1013
:param input_format: Format of the input targets. For instance [xmin, ymin, xmax, ymax, cls_id] refers to XYXY_LABEL.
1013
1014
:param output_format: Format of the output targets. For instance [xmin, ymin, xmax, ymax, cls_id] refers to XYXY_LABEL
1014
1015
:param min_bbox_edge_size: bboxes with edge size lower then this values will be removed.
1015
- :param max_targets: Max objects in single image, padding target to this size.
1016
1016
"""
1017
1017
1018
1018
@resolve_param ("input_format" , ConcatenatedTensorFormatFactory ())
@@ -1023,9 +1023,10 @@ def __init__(
1023
1023
input_format : ConcatenatedTensorFormat = XYXY_LABEL ,
1024
1024
output_format : ConcatenatedTensorFormat = LABEL_CXCYWH ,
1025
1025
min_bbox_edge_size : float = 1 ,
1026
- max_targets : int = 120 ,
1026
+ max_targets : Optional [ int ] = None ,
1027
1027
):
1028
1028
super (DetectionTargetsFormatTransform , self ).__init__ ()
1029
+ _max_targets_deprication (max_targets )
1029
1030
if isinstance (input_format , DetectionTargetsFormat ) or isinstance (output_format , DetectionTargetsFormat ):
1030
1031
raise TypeError (
1031
1032
"DetectionTargetsFormat is not supported for input_format and output_format starting from super_gradients==3.0.7.\n "
@@ -1035,7 +1036,6 @@ def __init__(
1035
1036
)
1036
1037
self .input_format = input_format
1037
1038
self .output_format = output_format
1038
- self .max_targets = max_targets
1039
1039
self .min_bbox_edge_size = min_bbox_edge_size
1040
1040
self .input_dim = None
1041
1041
@@ -1066,8 +1066,7 @@ def apply_on_targets(self, targets: np.ndarray) -> np.ndarray:
1066
1066
"""Convert targets in input_format to output_format, filter small bboxes and pad targets"""
1067
1067
targets = self .targets_format_converter (targets )
1068
1068
targets = self .filter_small_bboxes (targets )
1069
- targets = self .pad_targets (targets )
1070
- return targets
1069
+ return np .ascontiguousarray (targets , dtype = np .float32 )
1071
1070
1072
1071
def filter_small_bboxes (self , targets : np .ndarray ) -> np .ndarray :
1073
1072
"""Filter bboxes smaller than specified threshold."""
@@ -1078,13 +1077,6 @@ def _is_big_enough(bboxes: np.ndarray) -> np.ndarray:
1078
1077
targets = filter_on_bboxes (fn = _is_big_enough , tensor = targets , tensor_format = self .output_format )
1079
1078
return targets
1080
1079
1081
- def pad_targets (self , targets : np .ndarray ) -> np .ndarray :
1082
- """Pad targets."""
1083
- padded_targets = np .zeros ((self .max_targets , targets .shape [- 1 ]))
1084
- padded_targets [range (len (targets ))[: self .max_targets ]] = targets [: self .max_targets ]
1085
- padded_targets = np .ascontiguousarray (padded_targets , dtype = np .float32 )
1086
- return padded_targets
1087
-
1088
1080
def get_equivalent_preprocessing (self ) -> List :
1089
1081
return []
1090
1082
@@ -1331,3 +1323,12 @@ def __init__(self, max_val=255.0):
1331
1323
1332
1324
def forward (self , img ):
1333
1325
return img / self .max_val
1326
+
1327
+
1328
+ def _max_targets_deprication (max_targets : Optional [int ] = None ):
1329
+ if max_targets is not None :
1330
+ warnings .warn (
1331
+ "max_targets is deprecated and will be removed in the future, targets are not padded to the max length anymore. "
1332
+ "If you are using collate_fn provided by SG, it is safe to simply drop this argument." ,
1333
+ DeprecationWarning ,
1334
+ )
0 commit comments