Remove the xnli demos from the LIT examples.

PiperOrigin-RevId: 640597959
PAIR-code · Jun 5, 2024 · dd196e9 · dd196e9
1 parent c2fb41b
commit dd196e9
Show file tree

Hide file tree

Showing 4 changed files with 0 additions and 249 deletions.
diff --git a/lit_nlp/examples/datasets/classification.py b/lit_nlp/examples/datasets/classification.py
@@ -1,10 +1,8 @@
 """Text classification datasets, including single- and two-sentence tasks."""
 from typing import Optional
 
-from absl import logging
 from lit_nlp.api import dataset as lit_dataset
 from lit_nlp.api import types as lit_types
-import pandas as pd
 import tensorflow_datasets as tfds
 
 
@@ -17,134 +15,6 @@ def load_tfds(*args, **kw):
       tfds.as_numpy(tfds.load(*args, download=True, try_gcs=True, **kw)))
 
 
-class MNLIDataFromTSV(lit_dataset.Dataset):
-  """MultiNLI dataset, from TSV.
-
-  Compared to the TFDS version, this includes:
-  - label2 field for binary labels, with same schema as HANS
-  - genre labels, for stratified analysis
-
-  The downside is that you need to download the data from
-  https://gluebenchmark.com/tasks, and provide a path to the .tsv file.
-  """
-
-  LABELS3 = ["entailment", "neutral", "contradiction"]
-  LABELS2 = ["non-entailment", "entailment"]
-
-  def binarize_label(self, label):
-    return "entailment" if label == "entailment" else "non-entailment"
-
-  def __init__(self, path: str):
-    self._examples = self.load_datapoints(path)
-
-  def load_datapoints(self, path: str):
-    with open(path) as fd:
-      df = pd.read_csv(fd, sep="\t")
-    # pylint: disable=g-complex-comprehension
-    return [{
-        "premise": row["sentence1"],
-        "hypothesis": row["sentence2"],
-        "label": row["gold_label"],
-        "label2": self.binarize_label(row["gold_label"]),
-        "genre": row["genre"],
-    } for _, row in df.iterrows()]
-    # pylint: enable=g-complex-comprehension
-
-  def load(self, path: str):
-    datapoints = self.load_datapoints(path)
-    return lit_dataset.Dataset(base=self, examples=datapoints)
-
-  def save(self, examples: list[lit_types.IndexedInput], path: str):
-    example_data = [ex["data"] for ex in examples]
-    df = pd.DataFrame(example_data).rename(columns={
-        "premise": "sentence1",
-        "hypothesis": "sentence2",
-        "label": "gold_label",
-    })
-    with open(path, "w") as fd:
-      df.to_csv(fd, sep="\t")
-
-  def spec(self) -> lit_types.Spec:
-    """Should match MnliModel's input_spec()."""
-    return {
-        "premise": lit_types.TextSegment(),
-        "hypothesis": lit_types.TextSegment(),
-        # 'label' for 3-way NLI labels, 'label2' for binarized.
-        "label": lit_types.CategoryLabel(vocab=self.LABELS3),
-        "label2": lit_types.CategoryLabel(vocab=self.LABELS2),
-        "genre": lit_types.CategoryLabel(),
-    }
-
-
-class XNLIData(lit_dataset.Dataset):
-  """Cross-lingual NLI; see https://cims.nyu.edu/~sbowman/xnli/."""
-
-  LABELS = ["entailment", "neutral", "contradiction"]
-
-  def _process_example(self, ex, languages: list[str]):
-    # Hypothesis is stored as parallel arrays, so make a map.
-    hyp_map = {
-        lang.decode("utf-8"): hyp.decode("utf-8") for lang, hyp in zip(
-            ex["hypothesis"]["language"], ex["hypothesis"]["translation"])
-    }
-    for lang in languages:
-      if lang not in hyp_map:
-        logging.warning("Missing hypothesis (lang=%s) for premise '%s'", lang,
-                        ex["premise"]["lang"].decode("utf-8"))
-        continue
-      yield {
-          "premise": ex["premise"][lang].decode("utf-8"),
-          "hypothesis": hyp_map[lang],
-          "label": self.LABELS[ex["label"]],
-          "language": lang,
-      }
-
-  def __init__(self, split: str, languages=("en", "es", "hi", "zh")):
-    self._examples = []
-    for ex in load_tfds("xnli", split=split):
-      # Each TFDS example contains all the translations; we unpack to individual
-      # (premise, hypothesis) pairs that are compatible with a standard NLI
-      # model.
-      self._examples.extend(self._process_example(ex, languages))
-
-  def spec(self):
-    return {
-        "premise": lit_types.TextSegment(),
-        "hypothesis": lit_types.TextSegment(),
-        "label": lit_types.CategoryLabel(vocab=self.LABELS),
-        "language": lit_types.CategoryLabel(),
-    }
-
-
-class HansNLIData(lit_dataset.Dataset):
-  """HANS NLI challenge set (https://arxiv.org/abs/1902.01007); 30k examples."""
-
-  LABELS = ["non-entailment", "entailment"]
-
-  def __init__(self, path: str):
-    with open(path) as fd:
-      df = pd.read_csv(fd, sep="\t", header=0)
-    # pylint: disable=g-complex-comprehension
-    self._examples = [{
-        "premise": row["sentence1"],
-        "hypothesis": row["sentence2"],
-        "label2": row["gold_label"],
-        "heuristic": row["heuristic"],
-        "template": row["template"],
-    } for _, row in df.iterrows()]
-    # pylint: enable=g-complex-comprehension
-
-  def spec(self) -> lit_types.Spec:
-    return {
-        "premise": lit_types.TextSegment(),
-        "hypothesis": lit_types.TextSegment(),
-        # 'label2' for 2-way NLI labels
-        "label2": lit_types.CategoryLabel(vocab=self.LABELS),
-        "heuristic": lit_types.CategoryLabel(),
-        "template": lit_types.CategoryLabel(),
-    }
-
-
 class IMDBData(lit_dataset.Dataset):
   """IMDB reviews dataset; see http://ai.stanford.edu/~amaas/data/sentiment/."""
 

diff --git a/lit_nlp/examples/xnli_demo.py b/lit_nlp/examples/xnli_demo.py
diff --git a/website/sphinx_src/demos.md b/website/sphinx_src/demos.md
@@ -43,19 +43,6 @@ https://pair-code.github.io/lit/demos/.
 Tip: check out a case study for this demo on the public LIT website:
 https://pair-code.github.io/lit/tutorials/sentiment
 
-### Multilingual (XNLI) <!-- DO NOT REMOVE {#xnli .demo-header} -->
-
-**Code:** [examples/xnli_demo.py](https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/xnli_demo.py)
-
-*   [XNLI](https://cims.nyu.edu/~sbowman/xnli/) dataset translates a subset of
-    MultiNLI into 14 different languages.
-*   Specify `--languages=en,jp,hi,...` flag to select which languages to load.
-*   NLI as a three-way classification task with two-segment input (premise,
-    hypothesis).
-*   Fine-tuned multilingual BERT model.
-*   Salience methods work with non-whitespace-delimited text, by using the
-    model's wordpiece tokenization.
-
 --------------------------------------------------------------------------------
 
 ## Regression / Scoring <!-- DO NOT REMOVE {#regression-scoring .demo-section-header} -->

diff --git a/website/sphinx_src/faq.md b/website/sphinx_src/faq.md
@@ -34,9 +34,6 @@ All strings in LIT are unicode and most components use model-provided
 tokenization if available, so in most cases non-English languages and non-Latin
 scripts should work without any modifications. For examples, see:
 
-*   [XNLI demo](https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/xnli_demo.py) -
-    cross-lingual NLI, with up to 15 languages supported via a multilingual BERT
-    model.
 *   [T5 demo](https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/t5_demo.py) -
     includes WMT data for machine translation