Lightning-AI · Borda · Jan 17, 2022 · Jan 14, 2022 · Jan 14, 2022 · Jan 14, 2022
@@ -26,6 +26,7 @@
     from torchvision.ops import box_area, box_convert, box_iou
 else:
     box_convert = box_iou = box_area = None
+    __doctest_skip__ = ["MeanAveragePrecision", "MAP"]
 
 log = logging.getLogger(__name__)
 
@@ -197,25 +198,24 @@ class MeanAveragePrecision(Metric):
         ...     labels=torch.IntTensor([0]),
         ...   )
         ... ]
-        >>> metric = MeanAveragePrecision()  # doctest: +SKIP
-        >>> metric.update(preds, target)  # doctest: +SKIP
+        >>> metric = MeanAveragePrecision()
+        >>> metric.update(preds, target)
         >>> from pprint import pprint
-        >>> pprint(metric.compute())  # doctest: +SKIP
+        >>> pprint(metric.compute())  # doctest: +NORMALIZE_WHITESPACE
         {'map': tensor(0.6000),
          'map_50': tensor(1.),
          'map_75': tensor(1.),
-         'map_small': tensor(-1.),
-         'map_medium': tensor(-1.),
          'map_large': tensor(0.6000),
+         'map_medium': tensor(-1.),
+         'map_per_class': tensor(-1.),
+         'map_small': tensor(-1.),
          'mar_1': tensor(0.6000),
          'mar_10': tensor(0.6000),
          'mar_100': tensor(0.6000),
-         'mar_small': tensor(-1.),
-         'mar_medium': tensor(-1.),
+         'mar_100_per_class': tensor(-1.),
          'mar_large': tensor(0.6000),
-         'map_per_class': tensor(-1.),
-         'mar_100_per_class': tensor(-1.)
-        }
+         'mar_medium': tensor(-1.),
+         'mar_small': tensor(-1.)}
 
     Raises:
         ModuleNotFoundError:
@@ -761,25 +761,24 @@ class MAP(MeanAveragePrecision):
         ...     labels=torch.IntTensor([0]),
         ...   )
         ... ]
-        >>> metric = MAP()  # doctest: +SKIP
-        >>> metric.update(preds, target)  # doctest: +SKIP
+        >>> metric = MAP()
+        >>> metric.update(preds, target)
         >>> from pprint import pprint
-        >>> pprint(metric.compute())  # doctest: +SKIP
+        >>> pprint(metric.compute())  # doctest: +NORMALIZE_WHITESPACE
         {'map': tensor(0.6000),
          'map_50': tensor(1.),
          'map_75': tensor(1.),
-         'map_small': tensor(-1.),
-         'map_medium': tensor(-1.),
          'map_large': tensor(0.6000),
+         'map_medium': tensor(-1.),
+         'map_per_class': tensor(-1.),
+         'map_small': tensor(-1.),
          'mar_1': tensor(0.6000),
          'mar_10': tensor(0.6000),
          'mar_100': tensor(0.6000),
-         'mar_small': tensor(-1.),
-         'mar_medium': tensor(-1.),
+         'mar_100_per_class': tensor(-1.),
          'mar_large': tensor(0.6000),
-         'map_per_class': tensor(-1.),
-         'mar_100_per_class': tensor(-1.)
-        }
+         'mar_medium': tensor(-1.),
+         'mar_small': tensor(-1.)}
     """
 
     @deprecated(target=MeanAveragePrecision, deprecated_in="0.7", remove_in="0.8", stream=_future_warning)

@@ -28,6 +28,8 @@
 
 if _TRANSFORMERS_AUTO_AVAILABLE:
     from transformers.models.auto import AutoModel, AutoTokenizer
+else:
+    __doctest_skip__ = ["bert_score"]
 
 if _TQDM_AVAILABLE:
     import tqdm
@@ -566,7 +568,7 @@ def bert_score(
         >>> from torchmetrics.functional.text.bert import bert_score
         >>> preds = ["hello there", "general kenobi"]
         >>> target = ["hello there", "master kenobi"]
-        >>> bert_score(preds, target, lang="en")  # doctest: +SKIP
+        >>> bert_score(preds, target, lang="en")
         {'precision': [0.99..., 0.99...],
          'recall': [0.99..., 0.99...],
          'f1': [0.99..., 0.99...]}

@@ -21,6 +21,9 @@
 
 from torchmetrics.utilities.imports import _NLTK_AVAILABLE
 
+if not _NLTK_AVAILABLE:
+    __doctest_skip__ = ["rouge_score"]
+
 ALLOWED_ROUGE_KEYS: Dict[str, Union[int, str]] = {
     "rouge1": 1,
     "rouge2": 2,
@@ -188,23 +191,23 @@ def _rouge_score_update(
         >>> target = "Is your name John".split()
         >>> from pprint import pprint
         >>> score = _rouge_score_update(preds, target, rouge_keys_values=[1, 2, 3, 'L'], accumulate='best')
-        >>> pprint(score)  # doctest: +SKIP
+        >>> pprint(score)  # doctest: +NORMALIZE_WHITESPACE
         {1: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(1.), 'precision': tensor(1.), 'recall': tensor(1.)}],
-        2: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
-        3: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
-        'L': [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
-            {'fmeasure': tensor(1.), 'precision': tensor(1.), 'recall': tensor(1.)}]}
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
+         2: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
+         3: [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+             {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}],
+         'L': [{'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+               {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+               {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)},
+               {'fmeasure': tensor(0.), 'precision': tensor(0.), 'recall': tensor(0.)}]}
     """
     results: Dict[Union[int, str], List[Dict[str, Tensor]]] = {rouge_key: [] for rouge_key in rouge_keys_values}
 
@@ -316,19 +319,20 @@ def rouge_score(
         >>> preds = "My name is John"
         >>> target = "Is your name John"
         >>> from pprint import pprint
-        >>> pprint(rouge_score(preds, target))  # doctest: +SKIP
-        {'rouge1_fmeasure': 0.25,
-         'rouge1_precision': 0.25,
-         'rouge1_recall': 0.25,
-         'rouge2_fmeasure': 0.0,
-         'rouge2_precision': 0.0,
-         'rouge2_recall': 0.0,
-         'rougeL_fmeasure': 0.25,
-         'rougeL_precision': 0.25,
-         'rougeL_recall': 0.25,
-         'rougeLsum_fmeasure': 0.25,
-         'rougeLsum_precision': 0.25,
-         'rougeLsum_recall': 0.25}
+        >>> pprint(rouge_score(preds, target))
+        {'rouge1_fmeasure': tensor(0.7500),
+         'rouge1_precision': tensor(0.7500),
+         'rouge1_recall': tensor(0.7500),
+         'rouge2_fmeasure': tensor(0.),
+         'rouge2_precision': tensor(0.),
+         'rouge2_recall': tensor(0.),
+         'rougeL_fmeasure': tensor(0.5000),
+         'rougeL_precision': tensor(0.5000),
+         'rougeL_recall': tensor(0.5000),
+         'rougeLsum_fmeasure': tensor(0.5000),
+         'rougeLsum_precision': tensor(0.5000),
+         'rougeLsum_recall': tensor(0.5000)}
+
 
     Raises:
         ModuleNotFoundError:

@@ -27,6 +27,7 @@
 if _TORCH_FIDELITY_AVAILABLE:
     from torch_fidelity.feature_extractor_inceptionv3 import FeatureExtractorInceptionV3
 else:
+    __doctest_skip__ = ["FrechetInceptionDistance", "FID"]
 
     class FeatureExtractorInceptionV3(torch.nn.Module):  # type: ignore
         pass
@@ -193,13 +194,13 @@ class FrechetInceptionDistance(Metric):
         >>> import torch
         >>> _ = torch.manual_seed(123)
         >>> from torchmetrics.image.fid import FrechetInceptionDistance
-        >>> fid = FrechetInceptionDistance(feature=64)  # doctest: +SKIP
+        >>> fid = FrechetInceptionDistance(feature=64)
         >>> # generate two slightly overlapping image intensity distributions
-        >>> imgs_dist1 = torch.randint(0, 200, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> imgs_dist2 = torch.randint(100, 255, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> fid.update(imgs_dist1, real=True)  # doctest: +SKIP
-        >>> fid.update(imgs_dist2, real=False)  # doctest: +SKIP
-        >>> fid.compute()  # doctest: +SKIP
+        >>> imgs_dist1 = torch.randint(0, 200, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> imgs_dist2 = torch.randint(100, 255, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> fid.update(imgs_dist1, real=True)
+        >>> fid.update(imgs_dist2, real=False)
+        >>> fid.compute()
         tensor(12.7202)
 
     """
@@ -295,13 +296,13 @@ class FID(FrechetInceptionDistance):
     Example:
         >>> import torch
         >>> _ = torch.manual_seed(123)
-        >>> fid = FID(feature=64)  # doctest: +SKIP
+        >>> fid = FID(feature=64)
         >>> # generate two slightly overlapping image intensity distributions
-        >>> imgs_dist1 = torch.randint(0, 200, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> imgs_dist2 = torch.randint(100, 255, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> fid.update(imgs_dist1, real=True)  # doctest: +SKIP
-        >>> fid.update(imgs_dist2, real=False)  # doctest: +SKIP
-        >>> fid.compute()  # doctest: +SKIP
+        >>> imgs_dist1 = torch.randint(0, 200, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> imgs_dist2 = torch.randint(100, 255, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> fid.update(imgs_dist1, real=True)
+        >>> fid.update(imgs_dist2, real=False)
+        >>> fid.compute()
         tensor(12.7202)
 
     """

@@ -23,6 +23,9 @@
 from torchmetrics.utilities.data import dim_zero_cat
 from torchmetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE
 
+if not _TORCH_FIDELITY_AVAILABLE:
+    __doctest_skip__ = ["InceptionScore", "IS"]
+
 
 class InceptionScore(Metric):
     r"""
@@ -93,12 +96,12 @@ class InceptionScore(Metric):
         >>> import torch
         >>> _ = torch.manual_seed(123)
         >>> from torchmetrics.image.inception import InceptionScore
-        >>> inception = InceptionScore()  # doctest: +SKIP
+        >>> inception = InceptionScore()
         >>> # generate some images
-        >>> imgs = torch.randint(0, 255, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> inception.update(imgs)  # doctest: +SKIP
-        >>> inception.compute()  # doctest: +SKIP
-        (tensor(1.0569), tensor(0.0113))
+        >>> imgs = torch.randint(0, 255, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> inception.update(imgs)
+        >>> inception.compute()
+        (tensor(1.0544), tensor(0.0117))
 
     """
     features: List
@@ -190,12 +193,12 @@ class IS(InceptionScore):
     Example:
         >>> import torch
         >>> _ = torch.manual_seed(123)
-        >>> inception = IS()  # doctest: +SKIP
+        >>> inception = IS()
         >>> # generate some images
-        >>> imgs = torch.randint(0, 255, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> inception.update(imgs)  # doctest: +SKIP
-        >>> inception.compute()  # doctest: +SKIP
-        (tensor(1.0569), tensor(0.0113))
+        >>> imgs = torch.randint(0, 255, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> inception.update(imgs)
+        >>> inception.compute()
+        (tensor(1.0544), tensor(0.0117))
 
     """
 

@@ -24,6 +24,9 @@
 from torchmetrics.utilities.data import dim_zero_cat
 from torchmetrics.utilities.imports import _TORCH_FIDELITY_AVAILABLE
 
+if not _TORCH_FIDELITY_AVAILABLE:
+    __doctest_skip__ = ["KernelInceptionDistance", "KID"]
+
 
 def maximum_mean_discrepancy(k_xx: Tensor, k_xy: Tensor, k_yy: Tensor) -> Tensor:
     """Adapted from `KID Score`_"""
@@ -152,15 +155,15 @@ class KernelInceptionDistance(Metric):
         >>> import torch
         >>> _ = torch.manual_seed(123)
         >>> from torchmetrics.image.kid import KernelInceptionDistance
-        >>> kid = KernelInceptionDistance(subset_size=50)  # doctest: +SKIP
+        >>> kid = KernelInceptionDistance(subset_size=50)
         >>> # generate two slightly overlapping image intensity distributions
-        >>> imgs_dist1 = torch.randint(0, 200, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> imgs_dist2 = torch.randint(100, 255, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> kid.update(imgs_dist1, real=True)  # doctest: +SKIP
-        >>> kid.update(imgs_dist2, real=False)  # doctest: +SKIP
-        >>> kid_mean, kid_std = kid.compute()  # doctest: +SKIP
-        >>> print((kid_mean, kid_std))  # doctest: +SKIP
-        (tensor(0.0338), tensor(0.0025))
+        >>> imgs_dist1 = torch.randint(0, 200, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> imgs_dist2 = torch.randint(100, 255, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> kid.update(imgs_dist1, real=True)
+        >>> kid.update(imgs_dist2, real=False)
+        >>> kid_mean, kid_std = kid.compute()
+        >>> print((kid_mean, kid_std))
+        (tensor(0.0337), tensor(0.0023))
 
     """
     real_features: List[Tensor]
@@ -288,15 +291,15 @@ class KID(KernelInceptionDistance):
     Example:
         >>> import torch
         >>> _ = torch.manual_seed(123)
-        >>> kid = KID(subset_size=50)  # doctest: +SKIP
+        >>> kid = KID(subset_size=50)
         >>> # generate two slightly overlapping image intensity distributions
-        >>> imgs_dist1 = torch.randint(0, 200, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> imgs_dist2 = torch.randint(100, 255, (100, 3, 299, 299), dtype=torch.uint8)  # doctest: +SKIP
-        >>> kid.update(imgs_dist1, real=True)  # doctest: +SKIP
-        >>> kid.update(imgs_dist2, real=False)  # doctest: +SKIP
-        >>> kid_mean, kid_std = kid.compute()  # doctest: +SKIP
-        >>> print((kid_mean, kid_std))  # doctest: +SKIP
-        (tensor(0.0338), tensor(0.0025))
+        >>> imgs_dist1 = torch.randint(0, 200, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> imgs_dist2 = torch.randint(100, 255, (100, 3, 299, 299), dtype=torch.uint8)
+        >>> kid.update(imgs_dist1, real=True)
+        >>> kid.update(imgs_dist2, real=False)
+        >>> kid_mean, kid_std = kid.compute()
+        >>> print((kid_mean, kid_std))
+        (tensor(0.0337), tensor(0.0023))
 
     """
 

@@ -25,6 +25,8 @@
 if _TRANSFORMERS_AUTO_AVAILABLE:
     from transformers.models.auto import AutoTokenizer
 
+    __doctest_skip__ = ["BERTScore"]
+
 
 # Default model recommended in the original implementation.
 _DEFAULT_MODEL = "roberta-large"
@@ -115,8 +117,8 @@ class BERTScore(Metric):
         >>> from torchmetrics.text.bert import BERTScore
         >>> preds = ["hello there", "general kenobi"]
         >>> target = ["hello there", "master kenobi"]
-        >>> bertscore = BERTScore()
-        >>> bertscore(preds, target)  # doctest: +SKIP
+        >>> bert = BERTScore()
+        >>> bert(preds, target)
         {'precision': [0.99..., 0.99...],
          'recall': [0.99..., 0.99...],
          'f1': [0.99..., 0.99...]}

@@ -25,6 +25,9 @@
 )
 from torchmetrics.utilities.imports import _NLTK_AVAILABLE
 
+if not _NLTK_AVAILABLE:
+    __doctest_skip__ = ["ROUGEScore"]
+
 
 class ROUGEScore(Metric):
     """`Calculate Rouge Score`_, used for automatic summarization. This implementation should imitate the behaviour
@@ -55,21 +58,22 @@ class ROUGEScore(Metric):
         >>> from torchmetrics.text.rouge import ROUGEScore
         >>> preds = "My name is John"
         >>> target = "Is your name John"
-        >>> rouge = ROUGEScore()   # doctest: +SKIP
+        >>> rouge = ROUGEScore()
         >>> from pprint import pprint
-        >>> pprint(rouge(preds, target))  # doctest: +SKIP
-        {'rouge1_fmeasure': 0.25,
-         'rouge1_precision': 0.25,
-         'rouge1_recall': 0.25,
-         'rouge2_fmeasure': 0.0,
-         'rouge2_precision': 0.0,
-         'rouge2_recall': 0.0,
-         'rougeL_fmeasure': 0.25,
-         'rougeL_precision': 0.25,
-         'rougeL_recall': 0.25,
-         'rougeLsum_fmeasure': 0.25,
-         'rougeLsum_precision': 0.25,
-         'rougeLsum_recall': 0.25}
+        >>> pprint(rouge(preds, target))
+        {'rouge1_fmeasure': tensor(0.7500),
+         'rouge1_precision': tensor(0.7500),
+         'rouge1_recall': tensor(0.7500),
+         'rouge2_fmeasure': tensor(0.),
+         'rouge2_precision': tensor(0.),
+         'rouge2_recall': tensor(0.),
+         'rougeL_fmeasure': tensor(0.5000),
+         'rougeL_precision': tensor(0.5000),
+         'rougeL_recall': tensor(0.5000),
+         'rougeLsum_fmeasure': tensor(0.5000),
+         'rougeLsum_precision': tensor(0.5000),
+         'rougeLsum_recall': tensor(0.5000)}
+
 
     Raises:
         ValueError: