Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Updating pair classification model cards (#160)
Browse files Browse the repository at this point in the history
* updating pair classification model cards

* Update allennlp_models/modelcards/pair-classification-esim.json

Co-authored-by: Evan Pete Walsh <[email protected]>

Co-authored-by: Evan Pete Walsh <[email protected]>
  • Loading branch information
AkshitaB and epwalsh authored Nov 3, 2020
1 parent c6006da commit b152d82
Show file tree
Hide file tree
Showing 7 changed files with 233 additions and 8 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Changed

- Added more information to model cards for pair classification models (`pair-classification-decomposable-attention-elmo`, `pair-classification-roberta-snli`, `pair-classification-roberta-mnli`, `pair-classification-esim`).

## [v1.2.0](https://github.com/allenai/allennlp-models/releases/tag/v1.2.0) - 2020-10-29

### Changed
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,72 @@
{
"id": "pair-classification-decomposable-attention-elmo",
"registered_model_name": "decomposable_attention",
"registered_predictor_name": "textual_entailment",
"display_name": "ELMo-based Decomposable Attention",
"archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz"
"archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz",
"model_details": {
"description": "This `Model` implements the Decomposable Attention model described in [A Decomposable
Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)
by Parikh et al., 2016, with some optional enhancements before the decomposable attention
actually happens. Parikh's original model allowed for computing an \"intra-sentence\" attention
before doing the decomposable entailment step. We generalize this to any
[`Seq2SeqEncoder`](../modules/seq2seq_encoders/seq2seq_encoder.md) that can be applied to
the premise and/or the hypothesis before computing entailment.

The basic outline of this model is to get an embedded representation of each word in the
premise and hypothesis, align words between the two, compare the aligned phrases, and make a
final entailment decision based on this aggregated comparison. Each step in this process uses
a feedforward network to modify the representation.

This model uses ELMo embeddings.",
"developed_by": "Parikh et al",
"contributed_by": "Dirk Groeneveld",
"date": "2020-04-09",
"version": "1",
"model_type": "Seq2Seq",
"paper": "[A Decomposable Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)",
"citation": "@article{Parikh2016ADA,
title={A Decomposable Attention Model for Natural Language Inference},
author={Ankur P. Parikh and Oscar T{\"a}ckstr{\"o}m and Dipanjan Das and Jakob Uszkoreit},
journal={ArXiv},
year={2016},
volume={abs/1606.01933}}",
"license": null,
"contact": "[email protected]",
"training_config": "decomposable_attention_elmo.jsonnet",
},
"intended_use": {
"primary_uses": null,
"primary_users": null,
"out_of_scope_use_cases": null
},
"factors": {
"relevant_factors": null,
"evaluation_factors": null
},
"metrics": {
"model_performance_measures": "Accuracy",
"decision_thresholds": null,
"variation_approaches": null
},
"evaluation_data": {
"dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set",
"motivation": null,
"preprocessing": null
},
"training_data": {
"dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set",
"motivation": null,
"preprocessing": null
},
"quantitative_analyses": {
"unitary_results": null,
"intersectional_results": null
},
"ethical_considerations": {
"ethical_considerations": null
},
"caveats_and_recommendations": {
"caveats_and_recommendations": null
}
}
55 changes: 54 additions & 1 deletion allennlp_models/modelcards/pair-classification-esim.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,59 @@
{
"id": "pair-classification-esim",
"registered_model_name": "esim",
"registered_predictor_name": "textual_entailment",
"display_name": "Enhanced LSTM for Natural Language Inference",
"archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz"
"archive_file": "esim-elmo-2020.02.10.tar.gz",
"model_details": {
"description": "This `Model` implements the ESIM model, which is a sequential neural inference model
based on chain LSTMs.",
"developed_by": "Chen et al",
"contributed_by": "Dirk Groeneveld",
"date": "2020-04-09",
"version": "1",
"model_type": "LSTM",
"paper": "[Enhanced LSTM for Natural Language Inference](https://api.semanticscholar.org/CorpusID:34032948)",
"citation": "@inproceedings{Chen2017EnhancedLF,
title={Enhanced LSTM for Natural Language Inference},
author={Qian Chen and Xiao-Dan Zhu and Z. Ling and Si Wei and Hui Jiang and Diana Inkpen},
booktitle={ACL},
year={2017}}",
"license": null,
"contact": "[email protected]",
"training_config": "esim.jsonnet",
},
"intended_use": {
"primary_uses": null,
"primary_users": null,
"out_of_scope_use_cases": null
},
"factors": {
"relevant_factors": null,
"evaluation_factors": null
},
"metrics": {
"model_performance_measures": "Accuracy",
"decision_thresholds": null,
"variation_approaches": null
},
"evaluation_data": {
"dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set",
"motivation": null,
"preprocessing": null
},
"training_data": {
"dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set",
"motivation": null,
"preprocessing": null
},
"quantitative_analyses": {
"unitary_results": null,
"intersectional_results": null
},
"ethical_considerations": {
"ethical_considerations": null
},
"caveats_and_recommendations": {
"caveats_and_recommendations": null
}
}
54 changes: 53 additions & 1 deletion allennlp_models/modelcards/pair-classification-roberta-mnli.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,58 @@
"display_name": "RoBERTa MNLI",
"archive_file": "mnli-roberta-2020-07-29.tar.gz",
"model_details": {
"paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header"
"description": "This `Model` implements a basic text classifier. The text is embedded into a text field
using a RoBERTa-large model. The resulting sequence is pooled using a cls_pooler
`Seq2VecEncoder` and then passed to a linear classification layer, which projects
into the label space.",
"developed_by": "Liu et al",
"contributed_by": "Dirk Groeneveld",
"date": "2020-07-29",
"version": "1",
"model_type": "RoBERTa",
"paper": "[RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://api.semanticscholar.org/CorpusID:198953378)",
"citation": "@article{Liu2019RoBERTaAR,
title={RoBERTa: A Robustly Optimized BERT Pretraining Approach},
author={Y. Liu and Myle Ott and Naman Goyal and Jingfei Du and Mandar Joshi and Danqi Chen and Omer Levy and M. Lewis and Luke Zettlemoyer and Veselin Stoyanov},
journal={ArXiv},
year={2019},
volume={abs/1907.11692}}",
"license": null,
"contact": "[email protected]",
"training_config": "snli_roberta.jsonnet",
},
"intended_use": {
"primary_uses": null,
"primary_users": null,
"out_of_scope_use_cases": null
},
"factors": {
"relevant_factors": null,
"evaluation_factors": null
},
"metrics": {
"model_performance_measures": "Accuracy",
"decision_thresholds": null,
"variation_approaches": null
},
"evaluation_data": {
"dataset": "[Multi-genre Natural Language Inference (MultiNLI)](https://cims.nyu.edu/~sbowman/multinli/) dev set",
"motivation": null,
"preprocessing": null
},
"training_data": {
"dataset": "[Multi-genre Natural Language Inference (MultiNLI)](https://cims.nyu.edu/~sbowman/multinli/) train set",
"motivation": null,
"preprocessing": null
},
"quantitative_analyses": {
"unitary_results": null,
"intersectional_results": null
},
"ethical_considerations": {
"ethical_considerations": null
},
"caveats_and_recommendations": {
"caveats_and_recommendations": null
}
}
54 changes: 53 additions & 1 deletion allennlp_models/modelcards/pair-classification-roberta-snli.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,58 @@
"display_name": "RoBERTa SNLI",
"archive_file": "snli-roberta-2020-07-29.tar.gz",
"model_details": {
"paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header"
"description": "This `Model` implements a basic text classifier. The text is embedded into a text field
using a RoBERTa-large model. The resulting sequence is pooled using a cls_pooler
`Seq2VecEncoder` and then passed to a linear classification layer, which projects
into the label space.",
"developed_by": "Liu et al",
"contributed_by": "Dirk Groeneveld",
"date": "2020-07-29",
"version": "1",
"model_type": "RoBERTa",
"paper": "[RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://api.semanticscholar.org/CorpusID:198953378)",
"citation": "@article{Liu2019RoBERTaAR,
title={RoBERTa: A Robustly Optimized BERT Pretraining Approach},
author={Y. Liu and Myle Ott and Naman Goyal and Jingfei Du and Mandar Joshi and Danqi Chen and Omer Levy and M. Lewis and Luke Zettlemoyer and Veselin Stoyanov},
journal={ArXiv},
year={2019},
volume={abs/1907.11692}}",
"license": null,
"contact": "[email protected]",
"training_config": "snli_roberta.jsonnet",
},
"intended_use": {
"primary_uses": null,
"primary_users": null,
"out_of_scope_use_cases": null
},
"factors": {
"relevant_factors": null,
"evaluation_factors": null
},
"metrics": {
"model_performance_measures": "Accuracy",
"decision_thresholds": null,
"variation_approaches": null
},
"evaluation_data": {
"dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set",
"motivation": null,
"preprocessing": null
},
"training_data": {
"dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set",
"motivation": null,
"preprocessing": null
},
"quantitative_analyses": {
"unitary_results": null,
"intersectional_results": null
},
"ethical_considerations": {
"ethical_considerations": null
},
"caveats_and_recommendations": {
"caveats_and_recommendations": null
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
class DecomposableAttention(Model):
"""
This `Model` implements the Decomposable Attention model described in [A Decomposable
Attention Model for Natural Language Inference](
https://www.semanticscholar.org/paper/A-Decomposable-Attention-Model-for-Natural-Languag-Parikh-T%C3%A4ckstr%C3%B6m/07a9478e87a8304fc3267fa16e83e9f3bbd98b27)
Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)
by Parikh et al., 2016, with some optional enhancements before the decomposable attention
actually happens. Parikh's original model allowed for computing an "intra-sentence" attention
before doing the decomposable entailment step. We generalize this to any
Expand Down
3 changes: 1 addition & 2 deletions allennlp_models/pair_classification/models/esim.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
class ESIM(Model):
"""
This `Model` implements the ESIM sequence model described in [Enhanced LSTM for Natural Language Inference]
(https://www.semanticscholar.org/paper/Enhanced-LSTM-for-Natural-Language-Inference-Chen-Zhu/83e7654d545fbbaaf2328df365a781fb67b841b4)
by Chen et al., 2017.
(https://api.semanticscholar.org/CorpusID:34032948) by Chen et al., 2017.
Registered as a `Model` with name "esim".
Expand Down

0 comments on commit b152d82

Please sign in to comment.