diff --git a/documentation/source/ModelPredictions.md b/documentation/source/ModelPredictions.md index fceca00c2c..5c2fa3dc75 100644 --- a/documentation/source/ModelPredictions.md +++ b/documentation/source/ModelPredictions.md @@ -93,6 +93,7 @@ images_predictions.show(box_thickness=2, show_confidence=True) - `box_thickness`: Thickness of bounding boxes. - `show_confidence`: Whether to show confidence scores on the image. - `color_mapping`: List of tuples representing the colors for each class. +- `class_names`: List of class names to display. Only classes that the model was trained on are supported. By default, show all these classes. ### Save the Images with Detected Objects To save the images with detected objects as separate files, call the `images_predictions.save()` method and specify the output folder. diff --git a/src/super_gradients/training/utils/predict/prediction_results.py b/src/super_gradients/training/utils/predict/prediction_results.py index e55758b083..a4a987756d 100644 --- a/src/super_gradients/training/utils/predict/prediction_results.py +++ b/src/super_gradients/training/utils/predict/prediction_results.py @@ -113,32 +113,42 @@ def draw( target_bboxes: Optional[np.ndarray] = None, target_bboxes_format: Optional[str] = None, target_class_ids: Optional[np.ndarray] = None, + class_names: Optional[List[str]] = None, ) -> np.ndarray: """Draw the predicted bboxes on the image. - :param box_thickness: Thickness of bounding boxes. - :param show_confidence: Whether to show confidence scores on the image. - :param color_mapping: List of tuples representing the colors for each class. - Default is None, which generates a default color mapping based on the number of class names. - - :param target_bboxes: Optional[np.ndarray], ground truth bounding boxes represented as an np.ndarray of shape - (image_i_object_count, 4). When not None, will plot the predictions and the ground truth bounding boxes side - by side (i.e 2 images stitched as one). (default=None). - - :param target_class_ids: Optional[np.ndarray], ground truth target class indices - represented as an np.ndarray of shape (object_count). (default=None). - - :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of ['xyxy','xywh', - 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. Ignored if not - None and target_bboxes is None. + :param box_thickness: Thickness of bounding boxes. + :param show_confidence: Whether to show confidence scores on the image. + :param color_mapping: List of tuples representing the colors for each class. + Default is None, which generates a default color mapping based on the number of class names. + :param target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth bounding boxes. + Can either be an np.ndarray of shape (image_i_object_count, 4) when predicting a single image, + or a list of length len(target_bboxes), containing such arrays. + When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one) + :param target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth target class indices. Can either be an np.ndarray of shape + (image_i_object_count) when predicting a single image, or a list of length len(target_bboxes), containing such arrays. + :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of + ['xyxy','xywh', 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. + Will raise an error if not None and target_bboxes is None. + :param class_names: List of class names to show. By default, is None which shows all classes using during training. :return: Image with predicted bboxes. Note that this does not modify the original image. - """ image = self.image.copy() target_bboxes = target_bboxes if target_bboxes is not None else np.zeros((0, 4)) target_class_ids = target_class_ids if target_class_ids is not None else np.zeros((0, 1)) + + class_names_to_show = class_names if class_names else self.class_names + class_ids_to_show = [i for i, class_name in enumerate(self.class_names) if class_name in class_names_to_show] + invalid_class_names_to_show = set(class_names_to_show) - set(self.class_names) + if len(invalid_class_names_to_show) > 0: + raise ValueError( + "`class_names` includes class names that the model was not trained on.\n" + f" - Invalid class names: {list(invalid_class_names_to_show)}\n" + f" - Available class names: {list(self.class_names)}" + ) + bbox_format_factory = BBoxFormatFactory() if len(target_bboxes): target_bboxes_xyxy = convert_bboxes( @@ -155,33 +165,36 @@ def draw( color_mapping = color_mapping or generate_color_mapping(len(self.class_names)) for pred_i in np.argsort(self.prediction.confidence): + class_id = int(self.prediction.labels[pred_i]) - score = "" if not show_confidence else str(round(self.prediction.confidence[pred_i], 2)) - image = draw_bbox( - image=image, - title=f"{self.class_names[class_id]} {score}", - color=color_mapping[class_id], - box_thickness=box_thickness, - x1=int(self.prediction.bboxes_xyxy[pred_i, 0]), - y1=int(self.prediction.bboxes_xyxy[pred_i, 1]), - x2=int(self.prediction.bboxes_xyxy[pred_i, 2]), - y2=int(self.prediction.bboxes_xyxy[pred_i, 3]), - ) + if class_id in class_ids_to_show: + score = "" if not show_confidence else str(round(self.prediction.confidence[pred_i], 2)) + image = draw_bbox( + image=image, + title=f"{self.class_names[class_id]} {score}", + color=color_mapping[class_id], + box_thickness=box_thickness, + x1=int(self.prediction.bboxes_xyxy[pred_i, 0]), + y1=int(self.prediction.bboxes_xyxy[pred_i, 1]), + x2=int(self.prediction.bboxes_xyxy[pred_i, 2]), + y2=int(self.prediction.bboxes_xyxy[pred_i, 3]), + ) if plot_targets: target_image = self.image.copy() for target_idx in range(len(target_bboxes_xyxy)): class_id = int(target_class_ids[target_idx]) - target_image = draw_bbox( - image=target_image, - title=f"{self.class_names[class_id]}", - color=color_mapping[class_id], - box_thickness=box_thickness, - x1=int(target_bboxes_xyxy[target_idx, 0]), - y1=int(target_bboxes_xyxy[target_idx, 1]), - x2=int(target_bboxes_xyxy[target_idx, 2]), - y2=int(target_bboxes_xyxy[target_idx, 3]), - ) + if class_id in class_ids_to_show: + target_image = draw_bbox( + image=target_image, + title=f"{self.class_names[class_id]}", + color=color_mapping[class_id], + box_thickness=box_thickness, + x1=int(target_bboxes_xyxy[target_idx, 0]), + y1=int(target_bboxes_xyxy[target_idx, 1]), + x2=int(target_bboxes_xyxy[target_idx, 2]), + y2=int(target_bboxes_xyxy[target_idx, 3]), + ) height, width, ch = target_image.shape new_width, new_height = int(width + width / 20), int(height + height / 8) @@ -210,25 +223,25 @@ def show( target_bboxes: Optional[np.ndarray] = None, target_bboxes_format: Optional[str] = None, target_class_ids: Optional[np.ndarray] = None, + class_names: Optional[List[str]] = None, ) -> None: """Display the image with predicted bboxes. - :param box_thickness: Thickness of bounding boxes. - :param show_confidence: Whether to show confidence scores on the image. - :param color_mapping: List of tuples representing the colors for each class. - Default is None, which generates a default color mapping based on the number of class names. - - :param target_bboxes: Optional[np.ndarray], ground truth bounding boxes represented as an np.ndarray of shape - (image_i_object_count, 4). When not None, will plot the predictions and the ground truth bounding boxes side - by side (i.e 2 images stitched as one). (default=None). - - :param target_class_ids: Optional[np.ndarray], ground truth target class indices - represented as an np.ndarray of shape (object_count). (default=None). - - :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of ['xyxy','xywh', - 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. Ignored if not - None and target_bboxes is None. + :param box_thickness: Thickness of bounding boxes. + :param show_confidence: Whether to show confidence scores on the image. + :param color_mapping: List of tuples representing the colors for each class. + Default is None, which generates a default color mapping based on the number of class names. + :param target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth bounding boxes. + Can either be an np.ndarray of shape (image_i_object_count, 4) when predicting a single image, + or a list of length len(target_bboxes), containing such arrays. + When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one) + :param target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth target class indices. Can either be an np.ndarray of shape + (image_i_object_count) when predicting a single image, or a list of length len(target_bboxes), containing such arrays. + :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of + ['xyxy','xywh', 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. + Will raise an error if not None and target_bboxes is None. + :param class_names: List of class names to show. By default, is None which shows all classes using during training. """ image = self.draw( box_thickness=box_thickness, @@ -237,6 +250,7 @@ def show( target_bboxes=target_bboxes, target_bboxes_format=target_bboxes_format, target_class_ids=target_class_ids, + class_names=class_names, ) show_image(image) @@ -249,25 +263,25 @@ def save( target_bboxes: Optional[np.ndarray] = None, target_bboxes_format: Optional[str] = None, target_class_ids: Optional[np.ndarray] = None, + class_names: Optional[List[str]] = None, ) -> None: """Save the predicted bboxes on the images. - :param output_path: Path to the output video file. - :param box_thickness: Thickness of bounding boxes. - :param show_confidence: Whether to show confidence scores on the image. - :param color_mapping: List of tuples representing the colors for each class. - Default is None, which generates a default color mapping based on the number of class names. - - :param target_bboxes: Optional[np.ndarray], ground truth bounding boxes represented as an np.ndarray of shape - (image_i_object_count, 4). When not None, will plot the predictions and the ground truth bounding boxes side - by side (i.e 2 images stitched as one). (default=None). - - :param target_class_ids: Optional[np.ndarray], ground truth target class indices - represented as an np.ndarray of shape (object_count). (default=None). - - :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of ['xyxy','xywh', - 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. Ignored if not - None and target_bboxes is None. + :param output_path: Path to the output video file. + :param box_thickness: Thickness of bounding boxes. + :param show_confidence: Whether to show confidence scores on the image. + :param color_mapping: List of tuples representing the colors for each class. + Default is None, which generates a default color mapping based on the number of class names. + :param target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth bounding boxes. + Can either be an np.ndarray of shape (image_i_object_count, 4) when predicting a single image, + or a list of length len(target_bboxes), containing such arrays. + When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one) + :param target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth target class indices. Can either be an np.ndarray of shape + (image_i_object_count) when predicting a single image, or a list of length len(target_bboxes), containing such arrays. + :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of + ['xyxy','xywh', 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. + Will raise an error if not None and target_bboxes is None. + :param class_names: List of class names to show. By default, is None which shows all classes using during training. """ image = self.draw( box_thickness=box_thickness, @@ -276,6 +290,7 @@ def save( target_bboxes=target_bboxes, target_bboxes_format=target_bboxes_format, target_class_ids=target_class_ids, + class_names=class_names, ) save_image(image=image, path=output_path) @@ -378,23 +393,24 @@ def show( target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]] = None, target_bboxes_format: Optional[str] = None, target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]] = None, + class_names: Optional[List[str]] = None, ) -> None: """Display the predicted bboxes on the images. - :param box_thickness: Thickness of bounding boxes. - :param show_confidence: Whether to show confidence scores on the image. - :param color_mapping: List of tuples representing the colors for each class. - Default is None, which generates a default color mapping based on the number of class names. - :param target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth bounding boxes. Can either be an np.ndarray of shape - (image_i_object_count, 4) when predicting a single image, or a list of length len(target_bboxes), containing such arrays. - When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one). - - :param target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth target class indices. Can either be an np.ndarray of shape - (image_i_object_count) when predicting a single image, or a list of length len(target_bboxes), containing such arrays (default=None). - - :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of ['xyxy','xywh', - 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. Will raise an - error if not None and target_bboxes is None. + :param box_thickness: Thickness of bounding boxes. + :param show_confidence: Whether to show confidence scores on the image. + :param color_mapping: List of tuples representing the colors for each class. + Default is None, which generates a default color mapping based on the number of class names. + :param target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth bounding boxes. + Can either be an np.ndarray of shape (image_i_object_count, 4) when predicting a single image, + or a list of length len(target_bboxes), containing such arrays. + When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one) + :param target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth target class indices. Can either be an np.ndarray of shape + (image_i_object_count) when predicting a single image, or a list of length len(target_bboxes), containing such arrays. + :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of + ['xyxy','xywh', 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. + Will raise an error if not None and target_bboxes is None. + :param class_names: List of class names to show. By default, is None which shows all classes using during training. """ target_bboxes, target_class_ids = self._check_target_args(target_bboxes, target_bboxes_format, target_class_ids) @@ -406,6 +422,7 @@ def show( target_bboxes=target_bbox, target_bboxes_format=target_bboxes_format, target_class_ids=target_class_id, + class_names=class_names, ) def _check_target_args( @@ -447,24 +464,25 @@ def save( target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]] = None, target_bboxes_format: Optional[str] = None, target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]] = None, + class_names: Optional[List[str]] = None, ) -> None: """Save the predicted bboxes on the images. - :param output_folder: Folder path, where the images will be saved. - :param box_thickness: Thickness of bounding boxes. - :param show_confidence: Whether to show confidence scores on the image. - :param color_mapping: List of tuples representing the colors for each class. - Default is None, which generates a default color mapping based on the number of class names. - :param target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth bounding boxes. Can either be an np.ndarray of shape - (image_i_object_count, 4) when predicting a single image, or a list of length len(target_bboxes), containing such arrays. - When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one). - - :param target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth target class indices. Can either be an np.ndarray of shape - (image_i_object_count) when predicting a single image, or a list of length len(target_bboxes), containing such arrays (default=None). - - :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of ['xyxy','xywh', - 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. Will raise an - error if not None and target_bboxes is None. + :param output_folder: Folder path, where the images will be saved. + :param box_thickness: Thickness of bounding boxes. + :param show_confidence: Whether to show confidence scores on the image. + :param color_mapping: List of tuples representing the colors for each class. + Default is None, which generates a default color mapping based on the number of class names. + :param target_bboxes: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth bounding boxes. + Can either be an np.ndarray of shape (image_i_object_count, 4) when predicting a single image, + or a list of length len(target_bboxes), containing such arrays. + When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one) + :param target_class_ids: Optional[Union[np.ndarray, List[np.ndarray]]], ground truth target class indices. Can either be an np.ndarray of shape + (image_i_object_count) when predicting a single image, or a list of length len(target_bboxes), containing such arrays. + :param target_bboxes_format: Optional[str], bounding box format of target_bboxes, one of + ['xyxy','xywh', 'yxyx' 'cxcywh' 'normalized_xyxy' 'normalized_xywh', 'normalized_yxyx', 'normalized_cxcywh']. + Will raise an error if not None and target_bboxes is None. + :param class_names: List of class names to show. By default, is None which shows all classes using during training. """ if output_folder: os.makedirs(output_folder, exist_ok=True) @@ -473,7 +491,13 @@ def save( for i, (prediction, target_bbox, target_class_id) in enumerate(zip(self._images_prediction_lst, target_bboxes, target_class_ids)): image_output_path = os.path.join(output_folder, f"pred_{i}.jpg") - prediction.save(output_path=image_output_path, box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping) + prediction.save( + output_path=image_output_path, + box_thickness=box_thickness, + show_confidence=show_confidence, + color_mapping=color_mapping, + class_names=class_names, + ) @dataclass @@ -487,32 +511,59 @@ class VideoDetectionPrediction(VideoPredictions): _images_prediction_lst: List[ImageDetectionPrediction] fps: int - def draw(self, box_thickness: int = 2, show_confidence: bool = True, color_mapping: Optional[List[Tuple[int, int, int]]] = None) -> List[np.ndarray]: + def draw( + self, + box_thickness: int = 2, + show_confidence: bool = True, + color_mapping: Optional[List[Tuple[int, int, int]]] = None, + class_names: Optional[List[str]] = None, + ) -> List[np.ndarray]: """Draw the predicted bboxes on the images. :param box_thickness: Thickness of bounding boxes. :param show_confidence: Whether to show confidence scores on the image. :param color_mapping: List of tuples representing the colors for each class. Default is None, which generates a default color mapping based on the number of class names. + :param class_names: List of class names to show. By default, is None which shows all classes using during training. :return: List of images with predicted bboxes. Note that this does not modify the original image. """ frames_with_bbox = [ - result.draw(box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping) for result in self._images_prediction_lst + result.draw( + box_thickness=box_thickness, + show_confidence=show_confidence, + color_mapping=color_mapping, + class_names=class_names, + ) + for result in self._images_prediction_lst ] return frames_with_bbox - def show(self, box_thickness: int = 2, show_confidence: bool = True, color_mapping: Optional[List[Tuple[int, int, int]]] = None) -> None: + def show( + self, + box_thickness: int = 2, + show_confidence: bool = True, + color_mapping: Optional[List[Tuple[int, int, int]]] = None, + class_names: Optional[List[str]] = None, + ) -> None: """Display the predicted bboxes on the images. :param box_thickness: Thickness of bounding boxes. :param show_confidence: Whether to show confidence scores on the image. :param color_mapping: List of tuples representing the colors for each class. Default is None, which generates a default color mapping based on the number of class names. + :param class_names: List of class names to show. By default, is None which shows all classes using during training. """ - frames = self.draw(box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping) + frames = self.draw(box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping, class_names=class_names) show_video_from_frames(window_name="Detection", frames=frames, fps=self.fps) - def save(self, output_path: str, box_thickness: int = 2, show_confidence: bool = True, color_mapping: Optional[List[Tuple[int, int, int]]] = None) -> None: + def save( + self, + output_path: str, + box_thickness: int = 2, + show_confidence: bool = True, + color_mapping: Optional[List[Tuple[int, int, int]]] = None, + class_names: Optional[List[str]] = None, + ) -> None: """Save the predicted bboxes on the images. :param output_path: Path to the output video file. @@ -520,6 +571,7 @@ def save(self, output_path: str, box_thickness: int = 2, show_confidence: bool = :param show_confidence: Whether to show confidence scores on the image. :param color_mapping: List of tuples representing the colors for each class. Default is None, which generates a default color mapping based on the number of class names. + :param class_names: List of class names to show. By default, is None which shows all classes using during training. """ - frames = self.draw(box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping) + frames = self.draw(box_thickness=box_thickness, show_confidence=show_confidence, color_mapping=color_mapping, class_names=class_names) save_video(output_path=output_path, frames=frames, fps=self.fps) diff --git a/src/super_gradients/training/utils/predict/predictions.py b/src/super_gradients/training/utils/predict/predictions.py index 04835e5106..7e57bb8011 100644 --- a/src/super_gradients/training/utils/predict/predictions.py +++ b/src/super_gradients/training/utils/predict/predictions.py @@ -30,7 +30,7 @@ def __init__(self, bboxes: np.ndarray, bbox_format: str, confidence: np.ndarray, :param image_shape: Shape of the image the prediction is made on, (H, W). This is used to convert bboxes to xyxy format :param target_bboxes: np.ndarray, ground truth bounding boxes as np.ndarray of shape (image_i_object_count, 4) - When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one). + When not None, will plot the predictions and the ground truth bounding boxes side by side (i.e 2 images stitched as one) :param target_labels: np.ndarray, ground truth target class indices as an np.ndarray of shape (image_i_object_count). diff --git a/tests/unit_tests/test_predict.py b/tests/unit_tests/test_predict.py index b20039a714..65ed786ea3 100644 --- a/tests/unit_tests/test_predict.py +++ b/tests/unit_tests/test_predict.py @@ -74,6 +74,16 @@ def test_detection_models_with_targets(self): target_bboxes_format="xyxy", ) + def test_predict_class_names(self): + for model_name in [Models.YOLO_NAS_S, Models.YOLOX_S, Models.PP_YOLOE_S]: + model = models.get(model_name, pretrained_weights="coco") + + predictions = model.predict(self.np_array_images) + _ = predictions.show(class_names=["person", "bicycle", "car", "motorcycle", "airplane", "bus"]) + + with self.assertRaises(ValueError): + _ = predictions.show(class_names=["human"]) + if __name__ == "__main__": unittest.main()