add Extended Edit Distance (EED) metric (#668)

* add Extended Edit Distance (EED) metric * flake8, mypy, and doctest * fixed weird bug where parallelized metric was giving different answers to non-parallelized metric * update CHANGELOG.md Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jirka Borovec <[email protected]> Co-authored-by: Daniel Stancl <[email protected]> Co-authored-by: Jirka <[email protected]> Co-authored-by: Nicki Skafte Detlefsen <[email protected]>
Lightning-AI · Jan 10, 2022 · 8ef281c · 8ef281c
1 parent 3bd4fb0
commit 8ef281c
Show file tree

Hide file tree

Showing 11 changed files with 721 additions and 0 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   - `SQuAD` ([#623](https://github.com/PyTorchLightning/metrics/pull/623))
   - `CHRFScore` ([#641](https://github.com/PyTorchLightning/metrics/pull/641))
   - `TranslationEditRate` ([#646](https://github.com/PyTorchLightning/metrics/pull/646))
+  - `ExtendedEditDistance` ([#668](https://github.com/PyTorchLightning/metrics/pull/668))
+
 
 - Added `MultiScaleSSIM` into image metrics ([#679](https://github.com/PyTorchLightning/metrics/pull/679))
 

diff --git a/docs/source/links.rst b/docs/source/links.rst
@@ -76,4 +76,5 @@
 .. _chrF score: https://aclanthology.org/W15-3049.pdf
 .. _chrF++ score: https://aclanthology.org/W17-4770.pdf
 .. _TER: https://aclanthology.org/2006.amta-papers.25.pdf
+.. _ExtendedEditDistance: https://aclanthology.org/W19-5359.pdf
 .. _MultiScaleSSIM: https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf
diff --git a/docs/source/references/functional.rst b/docs/source/references/functional.rst
@@ -460,6 +460,12 @@ chrf_score [func]
 .. autofunction:: torchmetrics.functional.chrf_score
     :noindex:
 
+extended_edit_distance [func]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autofunction:: torchmetrics.functional.extended_edit_distance
+    :noindex:
+
 match_error_rate [func]
 ~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/docs/source/references/modules.rst b/docs/source/references/modules.rst
@@ -642,6 +642,12 @@ CHRFScore
 .. autoclass:: torchmetrics.CHRFScore
     :noindex:
 
+ExtendedEditDistance
+~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: torchmetrics.ExtendedEditDistance
+    :noindex:
+
 MatchErrorRate
 ~~~~~~~~~~~~~~
 

diff --git a/tests/text/inputs.py b/tests/text/inputs.py
@@ -63,3 +63,7 @@
 _inputs_error_rate_batch_size_1 = Input(**ERROR_RATES_BATCHES_1)
 
 _inputs_error_rate_batch_size_2 = Input(**ERROR_RATES_BATCHES_2)
+
+# single reference
+TUPLE_OF_SINGLE_REFERENCES = (((REFERENCE_1A), (REFERENCE_1B)), ((REFERENCE_1B), (REFERENCE_1C)))
+_inputs_single_reference = Input(preds=TUPLE_OF_HYPOTHESES, targets=TUPLE_OF_SINGLE_REFERENCES)
diff --git a/tests/text/test_eed.py b/tests/text/test_eed.py
@@ -0,0 +1,120 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+
+import pytest
+from torch import Tensor, tensor
+
+from tests.text.helpers import TextTester
+from tests.text.inputs import _inputs_single_reference, _inputs_single_sentence_multiple_references
+from torchmetrics.functional.text.eed import extended_edit_distance
+from torchmetrics.text.eed import ExtendedEditDistance
+
+
+def rwth_manual_metric(preds, targets) -> Tensor:
+    """The results were obtained w.r.t.
+
+    the examples defined in `tests.text.inputs` with the script from https://github.com/rwth-i6/ExtendedEditDistance.
+    """
+    ans_1 = tensor(0.24248056001808083)
+    ans_2 = tensor(0.19152276295133436)
+
+    HYPOTHESIS_A = "It is a guide to action which ensures that the military always obeys the commands of the party"
+
+    # If hypothesis A and B are in preds, the average of ans_1 and ans_2 is given
+    if len(preds) == 4:
+        return (ans_1 + ans_2) / 2
+    # If only hypothesis A or B are given, ans_1 and ans_2 are given, respectively
+    if HYPOTHESIS_A in preds:
+        return ans_1
+    return ans_2
+
+
+@pytest.mark.parametrize(
+    ["preds", "targets"],
+    [(_inputs_single_reference.preds, _inputs_single_reference.targets)],
+)
+class TestExtendedEditDistance(TextTester):
+    @pytest.mark.parametrize("ddp", [False, True])
+    @pytest.mark.parametrize("dist_sync_on_step", [False, True])
+    def test_eed_class(self, preds, targets, ddp, dist_sync_on_step):
+        rwth_metric = partial(rwth_manual_metric)
+        self.run_class_metric_test(
+            ddp=ddp,
+            preds=preds,
+            targets=targets,
+            metric_class=ExtendedEditDistance,
+            sk_metric=rwth_metric,
+            dist_sync_on_step=dist_sync_on_step,
+        )
+
+    def test_eed_functional(self, preds, targets):
+        rwth_metric = partial(rwth_manual_metric)
+        self.run_functional_metric_test(
+            preds,
+            targets,
+            metric_functional=extended_edit_distance,
+            sk_metric=rwth_metric,
+        )
+
+    def test_eed_differentiability(self, preds, targets):
+        self.run_differentiability_test(
+            preds=preds,
+            targets=targets,
+            metric_module=ExtendedEditDistance,
+            metric_functional=extended_edit_distance,
+        )
+
+
+# test blank edge cases
+def test_eed_empty_functional():
+    hyp = []
+    ref = [[]]
+    assert extended_edit_distance(hyp, ref) == tensor(0.0)
+
+
+def test_eed_empty_class():
+    eed_metric = ExtendedEditDistance()
+    hyp = []
+    ref = [[]]
+    assert eed_metric(hyp, ref) == tensor(0.0)
+
+
+def test_eed_empty_with_non_empty_hyp_functional():
+    hyp = ["python"]
+    ref = [[]]
+    assert extended_edit_distance(hyp, ref) == tensor(0.0)
+
+
+def test_eed_empty_with_non_empty_hyp_class():
+    eed_metric = ExtendedEditDistance()
+    hyp = ["python"]
+    ref = [[]]
+    assert eed_metric(hyp, ref) == tensor(0.0)
+
+
+def test_eed_return_sentence_level_score_functional():
+    hyp = _inputs_single_sentence_multiple_references.preds
+    ref = _inputs_single_sentence_multiple_references.targets
+    _, sentence_eed = extended_edit_distance(hyp, ref, return_sentence_level_score=True)
+    isinstance(sentence_eed, Tensor)
+
+
+def test_eed_return_sentence_level_class():
+    metric = ExtendedEditDistance(return_sentence_level_score=True)
+    hyp = _inputs_single_sentence_multiple_references.preds
+    ref = _inputs_single_sentence_multiple_references.targets
+    _, sentence_eed = metric(hyp, ref)
+    isinstance(sentence_eed, Tensor)
diff --git a/torchmetrics/__init__.py b/torchmetrics/__init__.py
@@ -86,6 +86,7 @@
     BLEUScore,
     CharErrorRate,
     CHRFScore,
+    ExtendedEditDistance,
     MatchErrorRate,
     SacreBLEUScore,
     SQuAD,
@@ -115,6 +116,7 @@
     "CosineSimilarity",
     "TweedieDevianceScore",
     "ExplainedVariance",
+    "ExtendedEditDistance",
     "F1",
     "F1Score",
     "FBeta",

diff --git a/torchmetrics/functional/__init__.py b/torchmetrics/functional/__init__.py
@@ -69,6 +69,7 @@
 from torchmetrics.functional.text.bleu import bleu_score
 from torchmetrics.functional.text.cer import char_error_rate
 from torchmetrics.functional.text.chrf import chrf_score
+from torchmetrics.functional.text.eed import extended_edit_distance
 from torchmetrics.functional.text.mer import match_error_rate
 from torchmetrics.functional.text.rouge import rouge_score
 from torchmetrics.functional.text.sacre_bleu import sacre_bleu_score
@@ -93,6 +94,7 @@
     "tweedie_deviance_score",
     "dice_score",
     "explained_variance",
+    "extended_edit_distance",
     "f1",
     "f1_score",
     "fbeta",