Skip to content

Commit

Permalink
Merge branch 'master' into half_testing2
Browse files Browse the repository at this point in the history
  • Loading branch information
Borda authored Mar 29, 2021
2 parents 7e186c2 + 53d5701 commit 9e21f63
Show file tree
Hide file tree
Showing 35 changed files with 676 additions and 87 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added `RetrievalMRR` metric for Information Retrieval ([#119](https://github.com/PyTorchLightning/metrics/pull/119))


- Added `RetrievalPrecision` metric for Information Retrieval ([#119](https://github.com/PyTorchLightning/metrics/pull/119))


- Added `average='micro'` as an option in AUROC for multilabel problems ([#110](https://github.com/PyTorchLightning/metrics/pull/110))


Expand All @@ -38,6 +41,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
)


- Added `BootStrapper` to easely calculate confidence intervals for metrics ([#101](https://github.com/PyTorchLightning/metrics/pull/101))


### Changed

- Changed `ExplainedVariance` from storing all preds/targets to tracking 5 statistics ([#68](https://github.com/PyTorchLightning/metrics/pull/68))
Expand Down
9 changes: 8 additions & 1 deletion docs/source/references/functional.rst
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,14 @@ retrieval_average_precision [func]


retrieval_reciprocal_rank [func]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autofunction:: torchmetrics.functional.retrieval_reciprocal_rank
:noindex:


retrieval_precision [func]
~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autofunction:: torchmetrics.functional.retrieval_precision
:noindex:
18 changes: 18 additions & 0 deletions docs/source/references/modules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -334,3 +334,21 @@ RetrievalMRR

.. autoclass:: torchmetrics.RetrievalMRR
:noindex:


RetrievalPrecision
~~~~~~~~~~~~~~~~~~

.. autoclass:: torchmetrics.RetrievalPrecision
:noindex:


********
Wrappers
********

Modular wrapper metrics are not metrics in themself, but instead take a metric and alter the internal logic
of the base metric.

.. autoclass:: torchmetrics.BootStrapper
:noindex:
49 changes: 49 additions & 0 deletions tests/functional/test_retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

from tests.helpers import seed_all
from tests.retrieval.test_mrr import _reciprocal_rank as reciprocal_rank
from tests.retrieval.test_precision import _precision_at_k as precision_at_k
from torchmetrics.functional.retrieval.average_precision import retrieval_average_precision
from torchmetrics.functional.retrieval.precision import retrieval_precision
from torchmetrics.functional.retrieval.reciprocal_rank import retrieval_reciprocal_rank

seed_all(1337)
Expand Down Expand Up @@ -42,9 +44,39 @@ def test_metrics_output_values(sklearn_metric, torch_metric, size):
assert torch.allclose(sk.float(), tm.float())


@pytest.mark.parametrize(['sklearn_metric', 'torch_metric'], [
[precision_at_k, retrieval_precision],
])
@pytest.mark.parametrize("size", [1, 4, 10])
@pytest.mark.parametrize("k", [None, 1, 4, 10])
def test_metrics_output_values_with_k(sklearn_metric, torch_metric, size, k):
""" Compare PL metrics to sklearn version. """
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# test results are computed correctly wrt std implementation
for i in range(6):
preds = np.random.randn(size)
target = np.random.randn(size) > 0

# sometimes test with integer targets
if (i % 2) == 0:
target = target.astype(np.int)

sk = torch.tensor(sklearn_metric(target, preds, k), device=device)
tm = torch_metric(torch.tensor(preds, device=device), torch.tensor(target, device=device), k)

# `torch_metric`s return 0 when no label is True
# while `sklearn` metrics returns NaN
if math.isnan(sk):
assert tm == 0
else:
assert torch.allclose(sk.float(), tm.float())


@pytest.mark.parametrize(['torch_metric'], [
[retrieval_average_precision],
[retrieval_reciprocal_rank],
[retrieval_precision]
])
def test_input_dtypes(torch_metric) -> None:
""" Check wrong input dtypes are managed correctly. """
Expand Down Expand Up @@ -75,6 +107,7 @@ def test_input_dtypes(torch_metric) -> None:
@pytest.mark.parametrize(['torch_metric'], [
[retrieval_average_precision],
[retrieval_reciprocal_rank],
[retrieval_precision]
])
def test_input_shapes(torch_metric) -> None:
""" Check wrong input shapes are managed correctly. """
Expand All @@ -93,3 +126,19 @@ def test_input_shapes(torch_metric) -> None:

with pytest.raises(ValueError, match="`preds` and `target` must be of the same shape"):
torch_metric(preds, target)


# test metrics using top K parameter
@pytest.mark.parametrize(['torch_metric'], [
[retrieval_precision]
])
@pytest.mark.parametrize('k', [-1, 1.0])
def test_input_params(torch_metric, k) -> None:
""" Check wrong input shapes are managed correctly. """
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# test with random tensors
preds = torch.tensor([0] * 4, device=device, dtype=torch.float)
target = torch.tensor([0] * 4, device=device, dtype=torch.int64)
with pytest.raises(ValueError, match="`k` has to be a positive integer or None"):
torch_metric(preds, target, k=k)
22 changes: 17 additions & 5 deletions tests/retrieval/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
from torch import Tensor

from tests.helpers import seed_all
from torchmetrics import Metric

seed_all(1337)


def _compute_sklearn_metric(
metric: Callable, target: List[np.ndarray], preds: List[np.ndarray], behaviour: str
metric: Callable, target: List[np.ndarray], preds: List[np.ndarray], behaviour: str, **kwargs
) -> Tensor:
""" Compute metric with multiple iterations over every query predictions set. """
sk_results = []
Expand All @@ -25,7 +26,7 @@ def _compute_sklearn_metric(
else:
sk_results.append(0.0)
else:
res = metric(b, a)
res = metric(b, a, **kwargs)
sk_results.append(res)

if len(sk_results) > 0:
Expand All @@ -34,10 +35,15 @@ def _compute_sklearn_metric(


def _test_retrieval_against_sklearn(
sklearn_metric, torch_metric, size, n_documents, query_without_relevant_docs_options
sklearn_metric: Callable,
torch_metric: Metric,
size: int,
n_documents: int,
query_without_relevant_docs_options: str,
**kwargs
) -> None:
""" Compare PL metrics to standard version. """
metric = torch_metric(query_without_relevant_docs=query_without_relevant_docs_options)
metric = torch_metric(query_without_relevant_docs=query_without_relevant_docs_options, **kwargs)
shape = (size, )

indexes = []
Expand All @@ -49,7 +55,7 @@ def _test_retrieval_against_sklearn(
preds.append(np.random.randn(*shape))
target.append(np.random.randn(*shape) > 0)

sk_results = _compute_sklearn_metric(sklearn_metric, target, preds, query_without_relevant_docs_options)
sk_results = _compute_sklearn_metric(sklearn_metric, target, preds, query_without_relevant_docs_options, **kwargs)
sk_results = torch.tensor(sk_results)

indexes_tensor = torch.cat([torch.tensor(i) for i in indexes]).long()
Expand Down Expand Up @@ -120,3 +126,9 @@ def _test_input_shapes(torchmetric) -> None:

with pytest.raises(ValueError, match="`indexes`, `preds` and `target` must be of the same shape"):
metric(indexes, preds, target)


def _test_input_args(torchmetric: Metric, message: str, **kwargs) -> None:
"""Check invalid args are managed correctly. """
with pytest.raises(ValueError, match=message):
torchmetric(**kwargs)
20 changes: 13 additions & 7 deletions tests/retrieval/test_mrr.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,31 @@
import numpy as np
import pytest
from sklearn.metrics import label_ranking_average_precision_score

from tests.retrieval.helpers import _test_dtypes, _test_input_shapes, _test_retrieval_against_sklearn
from torchmetrics.retrieval.mean_reciprocal_rank import RetrievalMRR


def _reciprocal_rank(target: np.array, preds: np.array):
"""
Implementation of reciprocal rank because couldn't find a good implementation.
`sklearn.metrics.label_ranking_average_precision_score` is similar but works in a different way
then the number of positive labels is greater than 1.
Adaptation of `sklearn.metrics.label_ranking_average_precision_score`.
Since the original sklearn metric works as RR only when the number of positive
targets is exactly 1, here we remove every positive target that is not the most
important. Remember that in RR only the positive target with the highest score is considered.
"""
assert target.shape == preds.shape
assert len(target.shape) == 1 # works only with single dimension inputs

# going to remove T targets that are not ranked as highest
indexes = preds[target.astype(np.bool)]
if len(indexes) > 0:
target[preds != indexes.max(-1, keepdims=True)[0]] = 0 # ensure that only 1 positive label is present

if target.sum() > 0:
target = target[np.argsort(preds, axis=-1)][::-1]
rank = np.nonzero(target)[0][0] + 1
return 1.0 / rank
# sklearn `label_ranking_average_precision_score` requires at most 2 dims
return label_ranking_average_precision_score(np.expand_dims(target, axis=0), np.expand_dims(preds, axis=0))
else:
return np.NaN
return 0.0


@pytest.mark.parametrize('size', [1, 4, 10])
Expand Down
56 changes: 56 additions & 0 deletions tests/retrieval/test_precision.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import numpy as np
import pytest

from tests.retrieval.helpers import _test_dtypes, _test_input_args, _test_input_shapes, _test_retrieval_against_sklearn
from torchmetrics.retrieval.retrieval_precision import RetrievalPrecision


def _precision_at_k(target: np.array, preds: np.array, k: int = None):
"""
Didn't find a reliable implementation of Precision in Information Retrieval, so,
reimplementing here. A good explanation can be found ``
"""
assert target.shape == preds.shape
assert len(target.shape) == 1 # works only with single dimension inputs

if k is None:
k = len(preds)

if target.sum() > 0:
order_indexes = np.argsort(preds, axis=0)[::-1]
relevant = np.sum(target[order_indexes][:k])
return relevant * 1.0 / k
else:
return np.NaN


@pytest.mark.parametrize('size', [1, 4, 10])
@pytest.mark.parametrize('n_documents', [1, 5])
@pytest.mark.parametrize('query_without_relevant_docs_options', ['skip', 'pos', 'neg'])
@pytest.mark.parametrize('k', [None, 1, 4, 10])
def test_results(size, n_documents, query_without_relevant_docs_options, k):
""" Test metrics are computed correctly. """
_test_retrieval_against_sklearn(
_precision_at_k,
RetrievalPrecision,
size,
n_documents,
query_without_relevant_docs_options,
k=k
)


def test_dtypes():
""" Check dypes are managed correctly. """
_test_dtypes(RetrievalPrecision)


def test_input_shapes() -> None:
"""Check inputs shapes are managed correctly. """
_test_input_shapes(RetrievalPrecision)


@pytest.mark.parametrize('k', [-1, 1.0])
def test_input_params(k) -> None:
"""Check invalid args are managed correctly. """
_test_input_args(RetrievalPrecision, "`k` has to be a positive integer or None", k=k)
Empty file added tests/wrappers/__init__.py
Empty file.
Loading

0 comments on commit 9e21f63

Please sign in to comment.