diff --git a/CHANGELOG.md b/CHANGELOG.md index dff44ae92..75acb7a3e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Changed + +- Added more information to model cards for pair classification models (`pair-classification-decomposable-attention-elmo`, `pair-classification-roberta-snli`, `pair-classification-roberta-mnli`, `pair-classification-esim`). + ## [v1.2.0](https://github.com/allenai/allennlp-models/releases/tag/v1.2.0) - 2020-10-29 ### Changed diff --git a/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json b/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json index bc6703d23..34d068df5 100644 --- a/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json +++ b/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json @@ -1,6 +1,72 @@ { "id": "pair-classification-decomposable-attention-elmo", "registered_model_name": "decomposable_attention", + "registered_predictor_name": "textual_entailment", "display_name": "ELMo-based Decomposable Attention", - "archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz" + "archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz", + "model_details": { + "description": "This `Model` implements the Decomposable Attention model described in [A Decomposable + Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258) + by Parikh et al., 2016, with some optional enhancements before the decomposable attention + actually happens. Parikh's original model allowed for computing an \"intra-sentence\" attention + before doing the decomposable entailment step. We generalize this to any + [`Seq2SeqEncoder`](../modules/seq2seq_encoders/seq2seq_encoder.md) that can be applied to + the premise and/or the hypothesis before computing entailment. + + The basic outline of this model is to get an embedded representation of each word in the + premise and hypothesis, align words between the two, compare the aligned phrases, and make a + final entailment decision based on this aggregated comparison. Each step in this process uses + a feedforward network to modify the representation. + + This model uses ELMo embeddings.", + "developed_by": "Parikh et al", + "contributed_by": "Dirk Groeneveld", + "date": "2020-04-09", + "version": "1", + "model_type": "Seq2Seq", + "paper": "[A Decomposable Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)", + "citation": "@article{Parikh2016ADA, + title={A Decomposable Attention Model for Natural Language Inference}, + author={Ankur P. Parikh and Oscar T{\"a}ckstr{\"o}m and Dipanjan Das and Jakob Uszkoreit}, + journal={ArXiv}, + year={2016}, + volume={abs/1606.01933}}", + "license": null, + "contact": "allennlp-contact@allenai.org", + "training_config": "decomposable_attention_elmo.jsonnet", + }, + "intended_use": { + "primary_uses": null, + "primary_users": null, + "out_of_scope_use_cases": null + }, + "factors": { + "relevant_factors": null, + "evaluation_factors": null + }, + "metrics": { + "model_performance_measures": "Accuracy", + "decision_thresholds": null, + "variation_approaches": null + }, + "evaluation_data": { + "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set", + "motivation": null, + "preprocessing": null + }, + "training_data": { + "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set", + "motivation": null, + "preprocessing": null + }, + "quantitative_analyses": { + "unitary_results": null, + "intersectional_results": null + }, + "ethical_considerations": { + "ethical_considerations": null + }, + "caveats_and_recommendations": { + "caveats_and_recommendations": null + } } diff --git a/allennlp_models/modelcards/pair-classification-esim.json b/allennlp_models/modelcards/pair-classification-esim.json index 721e2be1a..2f89c6263 100644 --- a/allennlp_models/modelcards/pair-classification-esim.json +++ b/allennlp_models/modelcards/pair-classification-esim.json @@ -1,6 +1,59 @@ { "id": "pair-classification-esim", "registered_model_name": "esim", + "registered_predictor_name": "textual_entailment", "display_name": "Enhanced LSTM for Natural Language Inference", - "archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz" + "archive_file": "esim-elmo-2020.02.10.tar.gz", + "model_details": { + "description": "This `Model` implements the ESIM model, which is a sequential neural inference model + based on chain LSTMs.", + "developed_by": "Chen et al", + "contributed_by": "Dirk Groeneveld", + "date": "2020-04-09", + "version": "1", + "model_type": "LSTM", + "paper": "[Enhanced LSTM for Natural Language Inference](https://api.semanticscholar.org/CorpusID:34032948)", + "citation": "@inproceedings{Chen2017EnhancedLF, + title={Enhanced LSTM for Natural Language Inference}, + author={Qian Chen and Xiao-Dan Zhu and Z. Ling and Si Wei and Hui Jiang and Diana Inkpen}, + booktitle={ACL}, + year={2017}}", + "license": null, + "contact": "allennlp-contact@allenai.org", + "training_config": "esim.jsonnet", + }, + "intended_use": { + "primary_uses": null, + "primary_users": null, + "out_of_scope_use_cases": null + }, + "factors": { + "relevant_factors": null, + "evaluation_factors": null + }, + "metrics": { + "model_performance_measures": "Accuracy", + "decision_thresholds": null, + "variation_approaches": null + }, + "evaluation_data": { + "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set", + "motivation": null, + "preprocessing": null + }, + "training_data": { + "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set", + "motivation": null, + "preprocessing": null + }, + "quantitative_analyses": { + "unitary_results": null, + "intersectional_results": null + }, + "ethical_considerations": { + "ethical_considerations": null + }, + "caveats_and_recommendations": { + "caveats_and_recommendations": null + } } diff --git a/allennlp_models/modelcards/pair-classification-roberta-mnli.json b/allennlp_models/modelcards/pair-classification-roberta-mnli.json index 8c237f40c..2c18992ca 100644 --- a/allennlp_models/modelcards/pair-classification-roberta-mnli.json +++ b/allennlp_models/modelcards/pair-classification-roberta-mnli.json @@ -5,6 +5,58 @@ "display_name": "RoBERTa MNLI", "archive_file": "mnli-roberta-2020-07-29.tar.gz", "model_details": { - "paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header" + "description": "This `Model` implements a basic text classifier. The text is embedded into a text field + using a RoBERTa-large model. The resulting sequence is pooled using a cls_pooler + `Seq2VecEncoder` and then passed to a linear classification layer, which projects + into the label space.", + "developed_by": "Liu et al", + "contributed_by": "Dirk Groeneveld", + "date": "2020-07-29", + "version": "1", + "model_type": "RoBERTa", + "paper": "[RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://api.semanticscholar.org/CorpusID:198953378)", + "citation": "@article{Liu2019RoBERTaAR, + title={RoBERTa: A Robustly Optimized BERT Pretraining Approach}, + author={Y. Liu and Myle Ott and Naman Goyal and Jingfei Du and Mandar Joshi and Danqi Chen and Omer Levy and M. Lewis and Luke Zettlemoyer and Veselin Stoyanov}, + journal={ArXiv}, + year={2019}, + volume={abs/1907.11692}}", + "license": null, + "contact": "allennlp-contact@allenai.org", + "training_config": "snli_roberta.jsonnet", + }, + "intended_use": { + "primary_uses": null, + "primary_users": null, + "out_of_scope_use_cases": null + }, + "factors": { + "relevant_factors": null, + "evaluation_factors": null + }, + "metrics": { + "model_performance_measures": "Accuracy", + "decision_thresholds": null, + "variation_approaches": null + }, + "evaluation_data": { + "dataset": "[Multi-genre Natural Language Inference (MultiNLI)](https://cims.nyu.edu/~sbowman/multinli/) dev set", + "motivation": null, + "preprocessing": null + }, + "training_data": { + "dataset": "[Multi-genre Natural Language Inference (MultiNLI)](https://cims.nyu.edu/~sbowman/multinli/) train set", + "motivation": null, + "preprocessing": null + }, + "quantitative_analyses": { + "unitary_results": null, + "intersectional_results": null + }, + "ethical_considerations": { + "ethical_considerations": null + }, + "caveats_and_recommendations": { + "caveats_and_recommendations": null } } diff --git a/allennlp_models/modelcards/pair-classification-roberta-snli.json b/allennlp_models/modelcards/pair-classification-roberta-snli.json index 52a452465..8c340c39b 100644 --- a/allennlp_models/modelcards/pair-classification-roberta-snli.json +++ b/allennlp_models/modelcards/pair-classification-roberta-snli.json @@ -5,6 +5,58 @@ "display_name": "RoBERTa SNLI", "archive_file": "snli-roberta-2020-07-29.tar.gz", "model_details": { - "paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header" + "description": "This `Model` implements a basic text classifier. The text is embedded into a text field + using a RoBERTa-large model. The resulting sequence is pooled using a cls_pooler + `Seq2VecEncoder` and then passed to a linear classification layer, which projects + into the label space.", + "developed_by": "Liu et al", + "contributed_by": "Dirk Groeneveld", + "date": "2020-07-29", + "version": "1", + "model_type": "RoBERTa", + "paper": "[RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://api.semanticscholar.org/CorpusID:198953378)", + "citation": "@article{Liu2019RoBERTaAR, + title={RoBERTa: A Robustly Optimized BERT Pretraining Approach}, + author={Y. Liu and Myle Ott and Naman Goyal and Jingfei Du and Mandar Joshi and Danqi Chen and Omer Levy and M. Lewis and Luke Zettlemoyer and Veselin Stoyanov}, + journal={ArXiv}, + year={2019}, + volume={abs/1907.11692}}", + "license": null, + "contact": "allennlp-contact@allenai.org", + "training_config": "snli_roberta.jsonnet", + }, + "intended_use": { + "primary_uses": null, + "primary_users": null, + "out_of_scope_use_cases": null + }, + "factors": { + "relevant_factors": null, + "evaluation_factors": null + }, + "metrics": { + "model_performance_measures": "Accuracy", + "decision_thresholds": null, + "variation_approaches": null + }, + "evaluation_data": { + "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set", + "motivation": null, + "preprocessing": null + }, + "training_data": { + "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set", + "motivation": null, + "preprocessing": null + }, + "quantitative_analyses": { + "unitary_results": null, + "intersectional_results": null + }, + "ethical_considerations": { + "ethical_considerations": null + }, + "caveats_and_recommendations": { + "caveats_and_recommendations": null } } diff --git a/allennlp_models/pair_classification/models/decomposable_attention.py b/allennlp_models/pair_classification/models/decomposable_attention.py index 66445a978..beb391d80 100644 --- a/allennlp_models/pair_classification/models/decomposable_attention.py +++ b/allennlp_models/pair_classification/models/decomposable_attention.py @@ -17,8 +17,7 @@ class DecomposableAttention(Model): """ This `Model` implements the Decomposable Attention model described in [A Decomposable - Attention Model for Natural Language Inference]( - https://www.semanticscholar.org/paper/A-Decomposable-Attention-Model-for-Natural-Languag-Parikh-T%C3%A4ckstr%C3%B6m/07a9478e87a8304fc3267fa16e83e9f3bbd98b27) + Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258) by Parikh et al., 2016, with some optional enhancements before the decomposable attention actually happens. Parikh's original model allowed for computing an "intra-sentence" attention before doing the decomposable entailment step. We generalize this to any diff --git a/allennlp_models/pair_classification/models/esim.py b/allennlp_models/pair_classification/models/esim.py index d76ebdf7f..c2371c171 100644 --- a/allennlp_models/pair_classification/models/esim.py +++ b/allennlp_models/pair_classification/models/esim.py @@ -22,8 +22,7 @@ class ESIM(Model): """ This `Model` implements the ESIM sequence model described in [Enhanced LSTM for Natural Language Inference] - (https://www.semanticscholar.org/paper/Enhanced-LSTM-for-Natural-Language-Inference-Chen-Zhu/83e7654d545fbbaaf2328df365a781fb67b841b4) - by Chen et al., 2017. + (https://api.semanticscholar.org/CorpusID:34032948) by Chen et al., 2017. Registered as a `Model` with name "esim".