conditional skip Doctests (#759)

* doctest_skip * doctest_requires * cfg & flags * Apply suggestions from code review * ELLIPSIS Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Daniel Stancl <[email protected]>
Lightning-AI · Jan 17, 2022 · e95c717 · e95c717
1 parent e87a85c
commit e95c717
Show file tree

Hide file tree

Showing 27 changed files with 175 additions and 144 deletions.
diff --git a/.github/workflows/ci_test-base.yml b/.github/workflows/ci_test-base.yml
@@ -19,7 +19,6 @@ jobs:
     env:
       PYTEST_ARTEFACT: pytest-results-${{ matrix.os }}-${{ matrix.python-version }}
       PYTORCH_URL: https://download.pytorch.org/whl/cpu/torch_stable.html
-      TRANSFORMERS_CACHE: .cache/huggingface/
 
     # Timeout: https://stackoverflow.com/a/59076067/4521646
     timeout-minutes: 20
@@ -52,27 +51,20 @@ jobs:
       uses: actions/cache@v2
       with:
         path: ${{ steps.pip-cache.outputs.dir }}
-        key: ${{ runner.os }}-pip-py${{ matrix.python-version }}-${{ hashFiles('requirements.txt') }}
+        key: ${{ runner.os }}-tiny-pip-py${{ matrix.python-version }}-${{ hashFiles('requirements.txt') }}
         restore-keys: |
-          ${{ runner.os }}-pip-py${{ matrix.python-version }}-
+          ${{ runner.os }}-tiny-pip-py${{ matrix.python-version }}-
 
     - name: Install dependencies
       run: |
         python --version
         pip --version
         pip install --requirement requirements.txt --upgrade --find-links $PYTORCH_URL
-        python ./requirements/adjust-versions.py requirements/image.txt
-        pip install --requirement requirements/devel.txt --upgrade --find-links $PYTORCH_URL
+        pip install --requirement requirements/test.txt --upgrade --find-links $PYTORCH_URL
         pip uninstall -y torchmetrics
         pip list
       shell: bash
 
-    - name: HF cache
-      uses: actions/cache@v2
-      with:
-        path: $TRANSFORMERS_CACHE
-        key: cache-transformers
-
     - name: Test Package [only]
       run: |
         # NOTE: run coverage on tests does not propagare faler status for Win, https://github.com/nedbat/coveragepy/issues/1003

diff --git a/requirements/test.txt b/requirements/test.txt
@@ -3,6 +3,7 @@ codecov>=2.1
 pytest>=6.0
 pytest-cov>2.10
 # pytest-flake8
+pytest-doctestplus
 check-manifest
 twine>=3.2
 mypy>=0.790

diff --git a/setup.cfg b/setup.cfg
@@ -4,10 +4,14 @@ norecursedirs =
     .github
     dist
     build
+doctest_plus = enabled
 addopts =
     --strict
-    --doctest-modules
     --color=yes
+doctest_optionflags =
+    NORMALIZE_WHITESPACE
+    ELLIPSIS
+    # FLOAT_CMP
 
 [coverage:run]
 parallel = True

diff --git a/torchmetrics/audio/pesq.py b/torchmetrics/audio/pesq.py
@@ -21,6 +21,8 @@
 from torchmetrics.utilities import _future_warning
 from torchmetrics.utilities.imports import _PESQ_AVAILABLE
 
+__doctest_requires__ = {("PerceptualEvaluationSpeechQuality", "PESQ"): ["pesq"]}
+
 
 class PerceptualEvaluationSpeechQuality(Metric):
     """Perceptual Evaluation of Speech Quality (PESQ)

diff --git a/torchmetrics/audio/sdr.py b/torchmetrics/audio/sdr.py
@@ -21,6 +21,8 @@
 from torchmetrics.utilities import _future_warning
 from torchmetrics.utilities.imports import _FAST_BSS_EVAL_AVAILABLE
 
+__doctest_requires__ = {("SignalDistortionRatio", "SDR"): ["fast_bss_eval"]}
+
 
 class SignalDistortionRatio(Metric):
     r"""Signal to Distortion Ratio (SDR) [1,2,3]

diff --git a/torchmetrics/audio/stoi.py b/torchmetrics/audio/stoi.py
@@ -21,6 +21,8 @@
 from torchmetrics.utilities import _future_warning
 from torchmetrics.utilities.imports import _PYSTOI_AVAILABLE
 
+__doctest_requires__ = {("ShortTermObjectiveIntelligibility", "STOI"): ["pystoi"]}
+
 
 class ShortTermObjectiveIntelligibility(Metric):
     r"""STOI (Short Term Objective Intelligibility, see [2,3]), a wrapper for the pystoi package [1].

diff --git a/torchmetrics/classification/binned_precision_recall.py b/torchmetrics/classification/binned_precision_recall.py
@@ -94,19 +94,19 @@ class BinnedPrecisionRecallCurve(Metric):
         >>> target = torch.tensor([0, 1, 3, 2])
         >>> pr_curve = BinnedPrecisionRecallCurve(num_classes=5, thresholds=3)
         >>> precision, recall, thresholds = pr_curve(pred, target)
-        >>> precision   # doctest: +NORMALIZE_WHITESPACE
+        >>> precision
         [tensor([0.2500, 1.0000, 1.0000, 1.0000]),
         tensor([0.2500, 1.0000, 1.0000, 1.0000]),
         tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]),
         tensor([2.5000e-01, 1.0000e-06, 1.0000e+00, 1.0000e+00]),
         tensor([2.5000e-07, 1.0000e+00, 1.0000e+00, 1.0000e+00])]
-        >>> recall   # doctest: +NORMALIZE_WHITESPACE
+        >>> recall
         [tensor([1.0000, 1.0000, 0.0000, 0.0000]),
         tensor([1.0000, 1.0000, 0.0000, 0.0000]),
         tensor([1.0000, 0.0000, 0.0000, 0.0000]),
         tensor([1.0000, 0.0000, 0.0000, 0.0000]),
         tensor([0., 0., 0., 0.])]
-        >>> thresholds   # doctest: +NORMALIZE_WHITESPACE
+        >>> thresholds
         [tensor([0.0000, 0.5000, 1.0000]),
         tensor([0.0000, 0.5000, 1.0000]),
         tensor([0.0000, 0.5000, 1.0000]),
@@ -285,7 +285,7 @@ class BinnedRecallAtFixedPrecision(BinnedPrecisionRecallCurve):
         ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
         >>> target = torch.tensor([0, 1, 3, 2])
         >>> average_precision = BinnedRecallAtFixedPrecision(num_classes=5, thresholds=10, min_precision=0.5)
-        >>> average_precision(pred, target)   # doctest: +NORMALIZE_WHITESPACE
+        >>> average_precision(pred, target)
         (tensor([1.0000, 1.0000, 0.0000, 0.0000, 0.0000]),
         tensor([6.6667e-01, 6.6667e-01, 1.0000e+06, 1.0000e+06, 1.0000e+06]))
     """

diff --git a/torchmetrics/classification/confusion_matrix.py b/torchmetrics/classification/confusion_matrix.py
@@ -85,7 +85,7 @@ class ConfusionMatrix(Metric):
         >>> target = torch.tensor([[0, 1, 0], [1, 0, 1]])
         >>> preds = torch.tensor([[0, 0, 1], [1, 0, 1]])
         >>> confmat = ConfusionMatrix(num_classes=3, multilabel=True)
-        >>> confmat(preds, target)  # doctest: +NORMALIZE_WHITESPACE
+        >>> confmat(preds, target)
         tensor([[[1, 0], [0, 1]],
                 [[1, 0], [1, 0]],
                 [[0, 1], [0, 1]]])

diff --git a/torchmetrics/classification/precision_recall_curve.py b/torchmetrics/classification/precision_recall_curve.py
@@ -72,7 +72,7 @@ class PrecisionRecallCurve(Metric):
         >>> target = torch.tensor([0, 1, 3, 2])
         >>> pr_curve = PrecisionRecallCurve(num_classes=5)
         >>> precision, recall, thresholds = pr_curve(pred, target)
-        >>> precision   # doctest: +NORMALIZE_WHITESPACE
+        >>> precision
         [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
          tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
         >>> recall

diff --git a/torchmetrics/classification/roc.py b/torchmetrics/classification/roc.py
@@ -80,7 +80,7 @@ class ROC(Metric):
         [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])]
         >>> tpr
         [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])]
-        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        >>> thresholds
         [tensor([1.7500, 0.7500, 0.0500]),
          tensor([1.7500, 0.7500, 0.0500]),
          tensor([1.7500, 0.7500, 0.0500]),
@@ -94,15 +94,15 @@ class ROC(Metric):
         >>> target = torch.tensor([[1, 1, 0], [0, 1, 0], [0, 0, 0], [0, 1, 1]])
         >>> roc = ROC(num_classes=3, pos_label=1)
         >>> fpr, tpr, thresholds = roc(pred, target)
-        >>> fpr # doctest: +NORMALIZE_WHITESPACE
+        >>> fpr
         [tensor([0.0000, 0.3333, 0.3333, 0.6667, 1.0000]),
          tensor([0., 0., 0., 1., 1.]),
          tensor([0.0000, 0.0000, 0.3333, 0.6667, 1.0000])]
-        >>> tpr  # doctest: +NORMALIZE_WHITESPACE
+        >>> tpr
         [tensor([0., 0., 1., 1., 1.]),
          tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000]),
          tensor([0., 1., 1., 1., 1.])]
-        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        >>> thresholds
         [tensor([1.8603, 0.8603, 0.8191, 0.3584, 0.2286]),
          tensor([1.7576, 0.7576, 0.3680, 0.3468, 0.0745]),
          tensor([1.1837, 0.1837, 0.1338, 0.1183, 0.1138])]

diff --git a/torchmetrics/detection/map.py b/torchmetrics/detection/map.py
@@ -20,12 +20,13 @@
 
 from torchmetrics.metric import Metric
 from torchmetrics.utilities import _future_warning
-from torchmetrics.utilities.imports import _TORCHVISION_AVAILABLE, _TORCHVISION_GREATER_EQUAL_0_8
+from torchmetrics.utilities.imports import _TORCHVISION_GREATER_EQUAL_0_8
 
-if _TORCHVISION_AVAILABLE and _TORCHVISION_GREATER_EQUAL_0_8:
+if _TORCHVISION_GREATER_EQUAL_0_8:
     from torchvision.ops import box_area, box_convert, box_iou
 else:
     box_convert = box_iou = box_area = None
+    __doctest_skip__ = ["MeanAveragePrecision", "MAP"]
 
 log = logging.getLogger(__name__)
 
@@ -197,25 +198,24 @@ class MeanAveragePrecision(Metric):
         ...     labels=torch.IntTensor([0]),
         ...   )
         ... ]
-        >>> metric = MeanAveragePrecision()  # doctest: +SKIP
-        >>> metric.update(preds, target)  # doctest: +SKIP
+        >>> metric = MeanAveragePrecision()
+        >>> metric.update(preds, target)
         >>> from pprint import pprint
-        >>> pprint(metric.compute())  # doctest: +SKIP
+        >>> pprint(metric.compute())
         {'map': tensor(0.6000),
          'map_50': tensor(1.),
          'map_75': tensor(1.),
-         'map_small': tensor(-1.),
-         'map_medium': tensor(-1.),
          'map_large': tensor(0.6000),
+         'map_medium': tensor(-1.),
+         'map_per_class': tensor(-1.),
+         'map_small': tensor(-1.),
          'mar_1': tensor(0.6000),
          'mar_10': tensor(0.6000),
          'mar_100': tensor(0.6000),
-         'mar_small': tensor(-1.),
-         'mar_medium': tensor(-1.),
+         'mar_100_per_class': tensor(-1.),
          'mar_large': tensor(0.6000),
-         'map_per_class': tensor(-1.),
-         'mar_100_per_class': tensor(-1.)
-        }
+         'mar_medium': tensor(-1.),
+         'mar_small': tensor(-1.)}
 
     Raises:
         ModuleNotFoundError:
@@ -761,25 +761,24 @@ class MAP(MeanAveragePrecision):
         ...     labels=torch.IntTensor([0]),
         ...   )
         ... ]
-        >>> metric = MAP()  # doctest: +SKIP
-        >>> metric.update(preds, target)  # doctest: +SKIP
+        >>> metric = MAP()
+        >>> metric.update(preds, target)
         >>> from pprint import pprint
-        >>> pprint(metric.compute())  # doctest: +SKIP
+        >>> pprint(metric.compute())
         {'map': tensor(0.6000),
          'map_50': tensor(1.),
          'map_75': tensor(1.),
-         'map_small': tensor(-1.),
-         'map_medium': tensor(-1.),
          'map_large': tensor(0.6000),
+         'map_medium': tensor(-1.),
+         'map_per_class': tensor(-1.),
+         'map_small': tensor(-1.),
          'mar_1': tensor(0.6000),
          'mar_10': tensor(0.6000),
          'mar_100': tensor(0.6000),
-         'mar_small': tensor(-1.),
-         'mar_medium': tensor(-1.),
+         'mar_100_per_class': tensor(-1.),
          'mar_large': tensor(0.6000),
-         'map_per_class': tensor(-1.),
-         'mar_100_per_class': tensor(-1.)
-        }
+         'mar_medium': tensor(-1.),
+         'mar_small': tensor(-1.)}
     """
 
     @deprecated(target=MeanAveragePrecision, deprecated_in="0.7", remove_in="0.8", stream=_future_warning)

diff --git a/torchmetrics/functional/audio/pesq.py b/torchmetrics/functional/audio/pesq.py
@@ -26,6 +26,13 @@
 from torchmetrics.utilities import _future_warning
 from torchmetrics.utilities.checks import _check_same_shape
 
+__doctest_requires__ = {
+    (
+        "perceptual_evaluation_speech_quality",
+        "pesq",
+    ): ["pesq"]
+}
+
 
 def perceptual_evaluation_speech_quality(
     preds: Tensor, target: Tensor, fs: int, mode: str, keep_same_device: bool = False

diff --git a/torchmetrics/functional/audio/sdr.py b/torchmetrics/functional/audio/sdr.py
@@ -40,6 +40,7 @@
     toeplitz_conjugate_gradient = None
     compute_stats = None
     _normalize = None
+    __doctest_skip__ = ["signal_distortion_ratio", "sdr"]
 
 from torch import Tensor
 

diff --git a/torchmetrics/functional/audio/stoi.py b/torchmetrics/functional/audio/stoi.py
@@ -21,6 +21,7 @@
     from pystoi import stoi as stoi_backend
 else:
     stoi_backend = None
+    __doctest_skip__ = ["short_term_objective_intelligibility", "stoi"]
 from torch import Tensor
 
 from torchmetrics.utilities import _future_warning

diff --git a/torchmetrics/functional/classification/confusion_matrix.py b/torchmetrics/functional/classification/confusion_matrix.py
@@ -86,7 +86,7 @@ def _confusion_matrix_compute(confmat: Tensor, normalize: Optional[str] = None)
         >>> target = torch.tensor([[0, 1, 0], [1, 0, 1]])
         >>> preds = torch.tensor([[0, 0, 1], [1, 0, 1]])
         >>> confmat = _confusion_matrix_update(preds, target, num_classes=3, multilabel=True)
-        >>> _confusion_matrix_compute(confmat)  # doctest: +NORMALIZE_WHITESPACE
+        >>> _confusion_matrix_compute(confmat)
         tensor([[[1, 0], [0, 1]],
                 [[1, 0], [1, 0]],
                 [[0, 1], [0, 1]]])
@@ -174,7 +174,7 @@ def confusion_matrix(
         >>> target = torch.tensor([[0, 1, 0], [1, 0, 1]])
         >>> preds = torch.tensor([[0, 0, 1], [1, 0, 1]])
         >>> confmat = ConfusionMatrix(num_classes=3, multilabel=True)
-        >>> confmat(preds, target)  # doctest: +NORMALIZE_WHITESPACE
+        >>> confmat(preds, target)
         tensor([[[1, 0], [0, 1]],
                 [[1, 0], [1, 0]],
                 [[0, 1], [0, 1]]])

diff --git a/torchmetrics/functional/classification/precision_recall_curve.py b/torchmetrics/functional/classification/precision_recall_curve.py
@@ -243,7 +243,7 @@ def _precision_recall_curve_compute(
         >>> num_classes = 5
         >>> preds, target, num_classes, pos_label = _precision_recall_curve_update(preds, target, num_classes)
         >>> precision, recall, thresholds = _precision_recall_curve_compute(preds, target, num_classes)
-        >>> precision   # doctest: +NORMALIZE_WHITESPACE
+        >>> precision
         [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
          tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
         >>> recall
@@ -321,7 +321,7 @@ def precision_recall_curve(
         ...                      [0.05, 0.05, 0.05, 0.75, 0.05]])
         >>> target = torch.tensor([0, 1, 3, 2])
         >>> precision, recall, thresholds = precision_recall_curve(pred, target, num_classes=5)
-        >>> precision   # doctest: +NORMALIZE_WHITESPACE
+        >>> precision
         [tensor([1., 1.]), tensor([1., 1.]), tensor([0.2500, 0.0000, 1.0000]),
          tensor([0.2500, 0.0000, 1.0000]), tensor([0., 1.])]
         >>> recall

diff --git a/torchmetrics/functional/classification/roc.py b/torchmetrics/functional/classification/roc.py
@@ -184,7 +184,7 @@ def _roc_compute(
         [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])]
         >>> tpr
         [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])]
-        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        >>> thresholds
         [tensor([1.7500, 0.7500, 0.0500]),
          tensor([1.7500, 0.7500, 0.0500]),
          tensor([1.7500, 0.7500, 0.0500]),
@@ -262,7 +262,7 @@ def roc(
         [tensor([0., 0., 1.]), tensor([0., 0., 1.]), tensor([0.0000, 0.3333, 1.0000]), tensor([0.0000, 0.3333, 1.0000])]
         >>> tpr
         [tensor([0., 1., 1.]), tensor([0., 1., 1.]), tensor([0., 0., 1.]), tensor([0., 0., 1.])]
-        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        >>> thresholds
         [tensor([1.7500, 0.7500, 0.0500]),
          tensor([1.7500, 0.7500, 0.0500]),
          tensor([1.7500, 0.7500, 0.0500]),
@@ -276,13 +276,13 @@ def roc(
         ...                      [0.8603, 0.0745, 0.1837]])
         >>> target = torch.tensor([[1, 1, 0], [0, 1, 0], [0, 0, 0], [0, 1, 1]])
         >>> fpr, tpr, thresholds = roc(pred, target, num_classes=3, pos_label=1)
-        >>> fpr # doctest: +NORMALIZE_WHITESPACE
+        >>> fpr
         [tensor([0.0000, 0.3333, 0.3333, 0.6667, 1.0000]),
          tensor([0., 0., 0., 1., 1.]),
          tensor([0.0000, 0.0000, 0.3333, 0.6667, 1.0000])]
         >>> tpr
         [tensor([0., 0., 1., 1., 1.]), tensor([0.0000, 0.3333, 0.6667, 0.6667, 1.0000]), tensor([0., 1., 1., 1., 1.])]
-        >>> thresholds # doctest: +NORMALIZE_WHITESPACE
+        >>> thresholds
         [tensor([1.8603, 0.8603, 0.8191, 0.3584, 0.2286]),
          tensor([1.7576, 0.7576, 0.3680, 0.3468, 0.0745]),
          tensor([1.1837, 0.1837, 0.1338, 0.1183, 0.1138])]

diff --git a/torchmetrics/functional/text/bert.py b/torchmetrics/functional/text/bert.py
@@ -28,6 +28,8 @@
 
 if _TRANSFORMERS_AUTO_AVAILABLE:
     from transformers.models.auto import AutoModel, AutoTokenizer
+else:
+    __doctest_skip__ = ["bert_score"]
 
 if _TQDM_AVAILABLE:
     import tqdm
@@ -570,10 +572,11 @@ def bert_score(
         >>> from torchmetrics.functional.text.bert import bert_score
         >>> preds = ["hello there", "general kenobi"]
         >>> target = ["hello there", "master kenobi"]
-        >>> bert_score(preds, target)  # doctest: +SKIP
-        {'precision': [0.999..., 0.996...],
-         'recall': [0.999..., 0.996...],
-         'f1': [0.999..., 0.996...]}
+        >>> from pprint import pprint
+        >>> pprint(bert_score(preds, target)) # doctest: +ELLIPSIS
+        {'f1': [0.999..., 0.996...],
+         'precision': [0.999..., 0.996...],
+         'recall': [0.999..., 0.996...]}
     """
     if len(preds) != len(target):
         raise ValueError("Number of predicted and reference sententes must be the same!")