diff --git a/lit_nlp/examples/datasets/classification.py b/lit_nlp/examples/datasets/classification.py index 0a022f1c..0d1e1df3 100644 --- a/lit_nlp/examples/datasets/classification.py +++ b/lit_nlp/examples/datasets/classification.py @@ -1,10 +1,8 @@ """Text classification datasets, including single- and two-sentence tasks.""" from typing import Optional -from absl import logging from lit_nlp.api import dataset as lit_dataset from lit_nlp.api import types as lit_types -import pandas as pd import tensorflow_datasets as tfds @@ -17,134 +15,6 @@ def load_tfds(*args, **kw): tfds.as_numpy(tfds.load(*args, download=True, try_gcs=True, **kw))) -class MNLIDataFromTSV(lit_dataset.Dataset): - """MultiNLI dataset, from TSV. - - Compared to the TFDS version, this includes: - - label2 field for binary labels, with same schema as HANS - - genre labels, for stratified analysis - - The downside is that you need to download the data from - https://gluebenchmark.com/tasks, and provide a path to the .tsv file. - """ - - LABELS3 = ["entailment", "neutral", "contradiction"] - LABELS2 = ["non-entailment", "entailment"] - - def binarize_label(self, label): - return "entailment" if label == "entailment" else "non-entailment" - - def __init__(self, path: str): - self._examples = self.load_datapoints(path) - - def load_datapoints(self, path: str): - with open(path) as fd: - df = pd.read_csv(fd, sep="\t") - # pylint: disable=g-complex-comprehension - return [{ - "premise": row["sentence1"], - "hypothesis": row["sentence2"], - "label": row["gold_label"], - "label2": self.binarize_label(row["gold_label"]), - "genre": row["genre"], - } for _, row in df.iterrows()] - # pylint: enable=g-complex-comprehension - - def load(self, path: str): - datapoints = self.load_datapoints(path) - return lit_dataset.Dataset(base=self, examples=datapoints) - - def save(self, examples: list[lit_types.IndexedInput], path: str): - example_data = [ex["data"] for ex in examples] - df = pd.DataFrame(example_data).rename(columns={ - "premise": "sentence1", - "hypothesis": "sentence2", - "label": "gold_label", - }) - with open(path, "w") as fd: - df.to_csv(fd, sep="\t") - - def spec(self) -> lit_types.Spec: - """Should match MnliModel's input_spec().""" - return { - "premise": lit_types.TextSegment(), - "hypothesis": lit_types.TextSegment(), - # 'label' for 3-way NLI labels, 'label2' for binarized. - "label": lit_types.CategoryLabel(vocab=self.LABELS3), - "label2": lit_types.CategoryLabel(vocab=self.LABELS2), - "genre": lit_types.CategoryLabel(), - } - - -class XNLIData(lit_dataset.Dataset): - """Cross-lingual NLI; see https://cims.nyu.edu/~sbowman/xnli/.""" - - LABELS = ["entailment", "neutral", "contradiction"] - - def _process_example(self, ex, languages: list[str]): - # Hypothesis is stored as parallel arrays, so make a map. - hyp_map = { - lang.decode("utf-8"): hyp.decode("utf-8") for lang, hyp in zip( - ex["hypothesis"]["language"], ex["hypothesis"]["translation"]) - } - for lang in languages: - if lang not in hyp_map: - logging.warning("Missing hypothesis (lang=%s) for premise '%s'", lang, - ex["premise"]["lang"].decode("utf-8")) - continue - yield { - "premise": ex["premise"][lang].decode("utf-8"), - "hypothesis": hyp_map[lang], - "label": self.LABELS[ex["label"]], - "language": lang, - } - - def __init__(self, split: str, languages=("en", "es", "hi", "zh")): - self._examples = [] - for ex in load_tfds("xnli", split=split): - # Each TFDS example contains all the translations; we unpack to individual - # (premise, hypothesis) pairs that are compatible with a standard NLI - # model. - self._examples.extend(self._process_example(ex, languages)) - - def spec(self): - return { - "premise": lit_types.TextSegment(), - "hypothesis": lit_types.TextSegment(), - "label": lit_types.CategoryLabel(vocab=self.LABELS), - "language": lit_types.CategoryLabel(), - } - - -class HansNLIData(lit_dataset.Dataset): - """HANS NLI challenge set (https://arxiv.org/abs/1902.01007); 30k examples.""" - - LABELS = ["non-entailment", "entailment"] - - def __init__(self, path: str): - with open(path) as fd: - df = pd.read_csv(fd, sep="\t", header=0) - # pylint: disable=g-complex-comprehension - self._examples = [{ - "premise": row["sentence1"], - "hypothesis": row["sentence2"], - "label2": row["gold_label"], - "heuristic": row["heuristic"], - "template": row["template"], - } for _, row in df.iterrows()] - # pylint: enable=g-complex-comprehension - - def spec(self) -> lit_types.Spec: - return { - "premise": lit_types.TextSegment(), - "hypothesis": lit_types.TextSegment(), - # 'label2' for 2-way NLI labels - "label2": lit_types.CategoryLabel(vocab=self.LABELS), - "heuristic": lit_types.CategoryLabel(), - "template": lit_types.CategoryLabel(), - } - - class IMDBData(lit_dataset.Dataset): """IMDB reviews dataset; see http://ai.stanford.edu/~amaas/data/sentiment/.""" diff --git a/lit_nlp/examples/xnli_demo.py b/lit_nlp/examples/xnli_demo.py deleted file mode 100644 index 4f209e39..00000000 --- a/lit_nlp/examples/xnli_demo.py +++ /dev/null @@ -1,103 +0,0 @@ -r"""Example demo for multilingual NLI on the XNLI eval set. - -To run locally with our trained model: - python -m lit_nlp.examples.xnli_demo --port=5432 - -Then navigate to localhost:5432 to access the demo UI. - -To train a model for this task, use tools/glue_trainer.py or your favorite -trainer script to fine-tune a multilingual encoder, such as -bert-base-multilingual-cased, on the mnli task. - -Note: the LIT UI can handle around 10k examples comfortably, depending on your -hardware. The monolingual (english) eval sets for MNLI are about 9.8k each, -while each language for XNLI is about 2.5k examples, so we recommend using the ---languages flag to load only the languages you're interested in. -""" - -from collections.abc import Sequence -import sys -from typing import Optional - -from absl import app -from absl import flags -from absl import logging - -from lit_nlp import dev_server -from lit_nlp import server_flags -from lit_nlp.examples.datasets import classification -from lit_nlp.examples.datasets import glue -from lit_nlp.examples.models import glue_models -from lit_nlp.lib import file_cache - -# NOTE: additional flags defined in server_flags.py - -FLAGS = flags.FLAGS - -FLAGS.set_default("development_demo", True) - -_LANGUAGES = flags.DEFINE_list( - "languages", ["en", "es", "hi", "zh"], - "Languages to load from XNLI. Available languages: " - "ar,bg,de,el,en,es,fr,hi,ru,sw,th,tr,ur,zh,vi" -) - -_MODEL_PATH = flags.DEFINE_string( - "model_path", - "https://storage.googleapis.com/what-if-tool-resources/lit-models/mbert_mnli.tar.gz", - ( - "Path to fine-tuned model files. Expects model to be in standard " - "transformers format, e.g. as saved by model.save_pretrained() and " - "tokenizer.save_pretrained()." - ), -) - -_MAX_EXAMPLES = flags.DEFINE_integer( - "max_examples", None, "Maximum number of examples to load into LIT. " - "Note: MNLI eval set is 10k examples, so will take a while to run and may " - "be slow on older machines. Set --max_examples=200 for a quick start.") - - -def get_wsgi_app() -> Optional[dev_server.LitServerType]: - """Returns a LitApp instance for consumption by gunicorn.""" - FLAGS.set_default("server_type", "external") - FLAGS.set_default("demo_mode", True) - # Parse flags without calling app.run(main), to avoid conflict with - # gunicorn command line flags. - unused = flags.FLAGS(sys.argv, known_only=True) - if unused: - logging.info("xnli_demo:get_wsgi_app() called with unused args: %s", unused) - return main([]) - - -def main(argv: Sequence[str]) -> Optional[dev_server.LitServerType]: - if len(argv) > 1: - raise app.UsageError("Too many command-line arguments.") - - # Normally path is a directory; if it's an archive file, download and - # extract to the transformers cache. - model_path = _MODEL_PATH.value - if model_path.endswith(".tar.gz"): - model_path = file_cache.cached_path( - model_path, extract_compressed_file=True) - - models = {"nli": glue_models.MNLIModel(model_path, inference_batch_size=16)} - datasets = { - "xnli": classification.XNLIData("validation", _LANGUAGES.value), - "mnli_dev": glue.MNLIData("validation_matched"), - "mnli_dev_mm": glue.MNLIData("validation_mismatched"), - } - - # Truncate datasets if --max_examples is set. - for name in datasets: - logging.info("Dataset: '%s' with %d examples", name, len(datasets[name])) - datasets[name] = datasets[name].slice[:_MAX_EXAMPLES.value] - logging.info(" truncated to %d examples", len(datasets[name])) - - # Start the LIT server. See server_flags.py for server options. - lit_demo = dev_server.Server(models, datasets, **server_flags.get_flags()) - return lit_demo.serve() - - -if __name__ == "__main__": - app.run(main) diff --git a/website/sphinx_src/demos.md b/website/sphinx_src/demos.md index c919790d..336d2b43 100644 --- a/website/sphinx_src/demos.md +++ b/website/sphinx_src/demos.md @@ -43,19 +43,6 @@ https://pair-code.github.io/lit/demos/. Tip: check out a case study for this demo on the public LIT website: https://pair-code.github.io/lit/tutorials/sentiment -### Multilingual (XNLI) - -**Code:** [examples/xnli_demo.py](https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/xnli_demo.py) - -* [XNLI](https://cims.nyu.edu/~sbowman/xnli/) dataset translates a subset of - MultiNLI into 14 different languages. -* Specify `--languages=en,jp,hi,...` flag to select which languages to load. -* NLI as a three-way classification task with two-segment input (premise, - hypothesis). -* Fine-tuned multilingual BERT model. -* Salience methods work with non-whitespace-delimited text, by using the - model's wordpiece tokenization. - -------------------------------------------------------------------------------- ## Regression / Scoring diff --git a/website/sphinx_src/faq.md b/website/sphinx_src/faq.md index 9ff5b3d5..fc70e29c 100644 --- a/website/sphinx_src/faq.md +++ b/website/sphinx_src/faq.md @@ -34,9 +34,6 @@ All strings in LIT are unicode and most components use model-provided tokenization if available, so in most cases non-English languages and non-Latin scripts should work without any modifications. For examples, see: -* [XNLI demo](https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/xnli_demo.py) - - cross-lingual NLI, with up to 15 languages supported via a multilingual BERT - model. * [T5 demo](https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/t5_demo.py) - includes WMT data for machine translation