diff --git a/CHANGELOG.md b/CHANGELOG.md
index dff44ae92..75acb7a3e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+### Changed
+
+- Added more information to model cards for pair classification models (`pair-classification-decomposable-attention-elmo`, `pair-classification-roberta-snli`, `pair-classification-roberta-mnli`, `pair-classification-esim`).
+
 ## [v1.2.0](https://github.com/allenai/allennlp-models/releases/tag/v1.2.0) - 2020-10-29
 
 ### Changed
diff --git a/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json b/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json
index bc6703d23..34d068df5 100644
--- a/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json
+++ b/allennlp_models/modelcards/pair-classification-decomposable-attention-elmo.json
@@ -1,6 +1,72 @@
 {
     "id": "pair-classification-decomposable-attention-elmo",
     "registered_model_name": "decomposable_attention",
+    "registered_predictor_name": "textual_entailment",
     "display_name": "ELMo-based Decomposable Attention",
-    "archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz"
+    "archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz",
+    "model_details": {
+        "description": "This `Model` implements the Decomposable Attention model described in [A Decomposable
+                        Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)
+                        by Parikh et al., 2016, with some optional enhancements before the decomposable attention
+                        actually happens.  Parikh's original model allowed for computing an \"intra-sentence\" attention
+                        before doing the decomposable entailment step.  We generalize this to any
+                        [`Seq2SeqEncoder`](../modules/seq2seq_encoders/seq2seq_encoder.md) that can be applied to
+                        the premise and/or the hypothesis before computing entailment.
+
+                        The basic outline of this model is to get an embedded representation of each word in the
+                        premise and hypothesis, align words between the two, compare the aligned phrases, and make a
+                        final entailment decision based on this aggregated comparison.  Each step in this process uses
+                        a feedforward network to modify the representation.
+
+                        This model uses ELMo embeddings.",
+        "developed_by": "Parikh et al",
+        "contributed_by": "Dirk Groeneveld",
+        "date": "2020-04-09",
+        "version": "1",
+        "model_type": "Seq2Seq",
+        "paper": "[A Decomposable Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)",
+        "citation": "@article{Parikh2016ADA,
+                     title={A Decomposable Attention Model for Natural Language Inference},
+                     author={Ankur P. Parikh and Oscar T{\"a}ckstr{\"o}m and Dipanjan Das and Jakob Uszkoreit},
+                     journal={ArXiv},
+                     year={2016},
+                     volume={abs/1606.01933}}",
+        "license": null,
+        "contact": "allennlp-contact@allenai.org",
+        "training_config": "decomposable_attention_elmo.jsonnet",
+    },
+    "intended_use": {
+        "primary_uses": null,
+        "primary_users": null,
+        "out_of_scope_use_cases": null
+    },
+    "factors": {
+        "relevant_factors": null,
+        "evaluation_factors": null
+    },
+    "metrics": {
+        "model_performance_measures": "Accuracy",
+        "decision_thresholds": null,
+        "variation_approaches": null
+    },
+    "evaluation_data": {
+        "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set",
+        "motivation": null,
+        "preprocessing": null
+    },
+    "training_data": {
+        "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set",
+        "motivation": null,
+        "preprocessing": null
+    },
+    "quantitative_analyses": {
+        "unitary_results": null,
+        "intersectional_results": null
+    },
+    "ethical_considerations": {
+        "ethical_considerations": null
+    },
+    "caveats_and_recommendations": {
+        "caveats_and_recommendations": null
+    }
 }
diff --git a/allennlp_models/modelcards/pair-classification-esim.json b/allennlp_models/modelcards/pair-classification-esim.json
index 721e2be1a..2f89c6263 100644
--- a/allennlp_models/modelcards/pair-classification-esim.json
+++ b/allennlp_models/modelcards/pair-classification-esim.json
@@ -1,6 +1,59 @@
 {
     "id": "pair-classification-esim",
     "registered_model_name": "esim",
+    "registered_predictor_name": "textual_entailment",
     "display_name": "Enhanced LSTM for Natural Language Inference",
-    "archive_file": "decomposable-attention-elmo-2020.04.09.tar.gz"
+    "archive_file": "esim-elmo-2020.02.10.tar.gz",
+    "model_details": {
+        "description": "This `Model` implements the ESIM model, which is a sequential neural inference model
+                        based on chain LSTMs.",
+        "developed_by": "Chen et al",
+        "contributed_by": "Dirk Groeneveld",
+        "date": "2020-04-09",
+        "version": "1",
+        "model_type": "LSTM",
+        "paper": "[Enhanced LSTM for Natural Language Inference](https://api.semanticscholar.org/CorpusID:34032948)",
+        "citation": "@inproceedings{Chen2017EnhancedLF,
+                     title={Enhanced LSTM for Natural Language Inference},
+                     author={Qian Chen and Xiao-Dan Zhu and Z. Ling and Si Wei and Hui Jiang and Diana Inkpen},
+                     booktitle={ACL},
+                     year={2017}}",
+        "license": null,
+        "contact": "allennlp-contact@allenai.org",
+        "training_config": "esim.jsonnet",
+    },
+    "intended_use": {
+        "primary_uses": null,
+        "primary_users": null,
+        "out_of_scope_use_cases": null
+    },
+    "factors": {
+        "relevant_factors": null,
+        "evaluation_factors": null
+    },
+    "metrics": {
+        "model_performance_measures": "Accuracy",
+        "decision_thresholds": null,
+        "variation_approaches": null
+    },
+    "evaluation_data": {
+        "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set",
+        "motivation": null,
+        "preprocessing": null
+    },
+    "training_data": {
+        "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set",
+        "motivation": null,
+        "preprocessing": null
+    },
+    "quantitative_analyses": {
+        "unitary_results": null,
+        "intersectional_results": null
+    },
+    "ethical_considerations": {
+        "ethical_considerations": null
+    },
+    "caveats_and_recommendations": {
+        "caveats_and_recommendations": null
+    }
 }
diff --git a/allennlp_models/modelcards/pair-classification-roberta-mnli.json b/allennlp_models/modelcards/pair-classification-roberta-mnli.json
index 8c237f40c..2c18992ca 100644
--- a/allennlp_models/modelcards/pair-classification-roberta-mnli.json
+++ b/allennlp_models/modelcards/pair-classification-roberta-mnli.json
@@ -5,6 +5,58 @@
     "display_name": "RoBERTa MNLI",
     "archive_file": "mnli-roberta-2020-07-29.tar.gz",
     "model_details": {
-        "paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header"
+        "description": "This `Model` implements a basic text classifier. The text is embedded into a text field
+                        using a RoBERTa-large model. The resulting sequence is pooled using a cls_pooler
+                        `Seq2VecEncoder` and then passed to a linear classification layer, which projects
+                        into the label space.",
+        "developed_by": "Liu et al",
+        "contributed_by": "Dirk Groeneveld",
+        "date": "2020-07-29",
+        "version": "1",
+        "model_type": "RoBERTa",
+        "paper": "[RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://api.semanticscholar.org/CorpusID:198953378)",
+        "citation": "@article{Liu2019RoBERTaAR,
+                     title={RoBERTa: A Robustly Optimized BERT Pretraining Approach},
+                     author={Y. Liu and Myle Ott and Naman Goyal and Jingfei Du and Mandar Joshi and Danqi Chen and Omer Levy and M. Lewis and Luke Zettlemoyer and Veselin Stoyanov},
+                     journal={ArXiv},
+                     year={2019},
+                     volume={abs/1907.11692}}",
+        "license": null,
+        "contact": "allennlp-contact@allenai.org",
+        "training_config": "snli_roberta.jsonnet",
+    },
+    "intended_use": {
+        "primary_uses": null,
+        "primary_users": null,
+        "out_of_scope_use_cases": null
+    },
+    "factors": {
+        "relevant_factors": null,
+        "evaluation_factors": null
+    },
+    "metrics": {
+        "model_performance_measures": "Accuracy",
+        "decision_thresholds": null,
+        "variation_approaches": null
+    },
+    "evaluation_data": {
+        "dataset": "[Multi-genre Natural Language Inference (MultiNLI)](https://cims.nyu.edu/~sbowman/multinli/) dev set",
+        "motivation": null,
+        "preprocessing": null
+    },
+    "training_data": {
+        "dataset": "[Multi-genre Natural Language Inference (MultiNLI)](https://cims.nyu.edu/~sbowman/multinli/) train set",
+        "motivation": null,
+        "preprocessing": null
+    },
+    "quantitative_analyses": {
+        "unitary_results": null,
+        "intersectional_results": null
+    },
+    "ethical_considerations": {
+        "ethical_considerations": null
+    },
+    "caveats_and_recommendations": {
+        "caveats_and_recommendations": null
     }
 }
diff --git a/allennlp_models/modelcards/pair-classification-roberta-snli.json b/allennlp_models/modelcards/pair-classification-roberta-snli.json
index 52a452465..8c340c39b 100644
--- a/allennlp_models/modelcards/pair-classification-roberta-snli.json
+++ b/allennlp_models/modelcards/pair-classification-roberta-snli.json
@@ -5,6 +5,58 @@
     "display_name": "RoBERTa SNLI",
     "archive_file": "snli-roberta-2020-07-29.tar.gz",
     "model_details": {
-        "paper": "https://www.semanticscholar.org/paper/RoBERTa%3A-A-Robustly-Optimized-BERT-Pretraining-Liu-Ott/077f8329a7b6fa3b7c877a57b81eb6c18b5f87de#paper-header"
+        "description": "This `Model` implements a basic text classifier. The text is embedded into a text field
+                        using a RoBERTa-large model. The resulting sequence is pooled using a cls_pooler
+                        `Seq2VecEncoder` and then passed to a linear classification layer, which projects
+                        into the label space.",
+        "developed_by": "Liu et al",
+        "contributed_by": "Dirk Groeneveld",
+        "date": "2020-07-29",
+        "version": "1",
+        "model_type": "RoBERTa",
+        "paper": "[RoBERTa: A Robustly Optimized BERT Pretraining Approach](https://api.semanticscholar.org/CorpusID:198953378)",
+        "citation": "@article{Liu2019RoBERTaAR,
+                     title={RoBERTa: A Robustly Optimized BERT Pretraining Approach},
+                     author={Y. Liu and Myle Ott and Naman Goyal and Jingfei Du and Mandar Joshi and Danqi Chen and Omer Levy and M. Lewis and Luke Zettlemoyer and Veselin Stoyanov},
+                     journal={ArXiv},
+                     year={2019},
+                     volume={abs/1907.11692}}",
+        "license": null,
+        "contact": "allennlp-contact@allenai.org",
+        "training_config": "snli_roberta.jsonnet",
+    },
+    "intended_use": {
+        "primary_uses": null,
+        "primary_users": null,
+        "out_of_scope_use_cases": null
+    },
+    "factors": {
+        "relevant_factors": null,
+        "evaluation_factors": null
+    },
+    "metrics": {
+        "model_performance_measures": "Accuracy",
+        "decision_thresholds": null,
+        "variation_approaches": null
+    },
+    "evaluation_data": {
+        "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) dev set",
+        "motivation": null,
+        "preprocessing": null
+    },
+    "training_data": {
+        "dataset": "[Stanford Natural Language Inference (SNLI)](https://nlp.stanford.edu/projects/snli/) train set",
+        "motivation": null,
+        "preprocessing": null
+    },
+    "quantitative_analyses": {
+        "unitary_results": null,
+        "intersectional_results": null
+    },
+    "ethical_considerations": {
+        "ethical_considerations": null
+    },
+    "caveats_and_recommendations": {
+        "caveats_and_recommendations": null
     }
 }
diff --git a/allennlp_models/pair_classification/models/decomposable_attention.py b/allennlp_models/pair_classification/models/decomposable_attention.py
index 66445a978..beb391d80 100644
--- a/allennlp_models/pair_classification/models/decomposable_attention.py
+++ b/allennlp_models/pair_classification/models/decomposable_attention.py
@@ -17,8 +17,7 @@
 class DecomposableAttention(Model):
     """
     This `Model` implements the Decomposable Attention model described in [A Decomposable
-    Attention Model for Natural Language Inference](
-    https://www.semanticscholar.org/paper/A-Decomposable-Attention-Model-for-Natural-Languag-Parikh-T%C3%A4ckstr%C3%B6m/07a9478e87a8304fc3267fa16e83e9f3bbd98b27)
+    Attention Model for Natural Language Inference](https://api.semanticscholar.org/CorpusID:8495258)
     by Parikh et al., 2016, with some optional enhancements before the decomposable attention
     actually happens.  Parikh's original model allowed for computing an "intra-sentence" attention
     before doing the decomposable entailment step.  We generalize this to any
diff --git a/allennlp_models/pair_classification/models/esim.py b/allennlp_models/pair_classification/models/esim.py
index d76ebdf7f..c2371c171 100644
--- a/allennlp_models/pair_classification/models/esim.py
+++ b/allennlp_models/pair_classification/models/esim.py
@@ -22,8 +22,7 @@
 class ESIM(Model):
     """
     This `Model` implements the ESIM sequence model described in [Enhanced LSTM for Natural Language Inference]
-    (https://www.semanticscholar.org/paper/Enhanced-LSTM-for-Natural-Language-Inference-Chen-Zhu/83e7654d545fbbaaf2328df365a781fb67b841b4)
-    by Chen et al., 2017.
+    (https://api.semanticscholar.org/CorpusID:34032948) by Chen et al., 2017.
 
     Registered as a `Model` with name "esim".