keras-team · mattdangerw · Mar 8, 2023 · Mar 1, 2023 · Mar 1, 2023 · Mar 1, 2023
diff --git a/keras_nlp/models/albert/albert_presets.py b/keras_nlp/models/albert/albert_presets.py
@@ -24,6 +24,7 @@
             "params": 11683584,
             "official_name": "ALBERT",
             "path": "albert",
+            "model_card": "https://github.com/google-research/albert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30000,
@@ -53,6 +54,7 @@
             "params": 17683968,
             "official_name": "ALBERT",
             "path": "albert",
+            "model_card": "https://github.com/google-research/albert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30000,
@@ -82,6 +84,7 @@
             "params": 58724864,
             "official_name": "ALBERT",
             "path": "albert",
+            "model_card": "https://github.com/google-research/albert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30000,
@@ -111,6 +114,7 @@
             "params": 222595584,
             "official_name": "ALBERT",
             "path": "albert",
+            "model_card": "https://github.com/google-research/albert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30000,

diff --git a/keras_nlp/models/bart/bart_presets.py b/keras_nlp/models/bart/bart_presets.py
@@ -15,6 +15,16 @@
 
 backbone_presets = {
     "bart_base_en": {
+        "metadata": {
+            "description": (
+                "6-layer BART model where case is maintained. "
+                "Trained on BookCorpus, English Wikipedia and CommonCrawl."
+            ),
+            "params": 139417344,
+            "official_name": "BART",
+            "path": "bart",
+            "model_card": "https://github.com/facebookresearch/fairseq/blob/main/examples/bart/README.md",
+        },
         "config": {
             "vocabulary_size": 50265,
             "num_layers": 6,
@@ -25,15 +35,6 @@
             "max_sequence_length": 1024,
         },
         "preprocessor_config": {},
-        "metadata": {
-            "description": (
-                "6-layer BART model where case is maintained. "
-                "Trained on BookCorpus, English Wikipedia and CommonCrawl."
-            ),
-            "params": 139417344,
-            "official_name": "BART",
-            "path": "bart",
-        },
         "weights_url": "https://storage.googleapis.com/keras-nlp/models/bart_base_en/v1/model.h5",
         "weights_hash": "5b59403f0cafafbd89680e0785791163",
         "vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bart_base_en/v1/vocab.json",
@@ -42,6 +43,16 @@
         "merges_hash": "75a37753dd7a28a2c5df80c28bf06e4e",
     },
     "bart_large_en": {
+        "metadata": {
+            "description": (
+                "12-layer BART model where case is maintained. "
+                "Trained on BookCorpus, English Wikipedia and CommonCrawl."
+            ),
+            "params": 406287360,
+            "official_name": "BART",
+            "path": "bart",
+            "model_card": "https://github.com/facebookresearch/fairseq/blob/main/examples/bart/README.md",
+        },
         "config": {
             "vocabulary_size": 50265,
             "num_layers": 12,
@@ -52,15 +63,6 @@
             "max_sequence_length": 1024,
         },
         "preprocessor_config": {},
-        "metadata": {
-            "description": (
-                "12-layer BART model where case is maintained. "
-                "Trained on BookCorpus, English Wikipedia and CommonCrawl."
-            ),
-            "params": 406287360,
-            "official_name": "BART",
-            "path": "bart",
-        },
         "weights_url": "https://storage.googleapis.com/keras-nlp/models/bart_large_en/v1/model.h5",
         "weights_hash": "6bfe7e591af8c5699ce6f9f18753af9a",
         "vocabulary_url": "https://storage.googleapis.com/keras-nlp/models/bart_large_en/v1/vocab.json",

diff --git a/keras_nlp/models/bert/bert_presets.py b/keras_nlp/models/bert/bert_presets.py
@@ -25,6 +25,7 @@
             "params": 4385920,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30522,
@@ -53,6 +54,7 @@
             "params": 28763648,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30522,
@@ -81,6 +83,7 @@
             "params": 41373184,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30522,
@@ -109,6 +112,7 @@
             "params": 109482240,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30522,
@@ -137,6 +141,7 @@
             "params": 108310272,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 28996,
@@ -164,6 +169,7 @@
             "params": 102267648,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 21128,
@@ -191,6 +197,7 @@
             "params": 177853440,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 119547,
@@ -219,6 +226,7 @@
             "params": 335141888,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 30522,
@@ -247,6 +255,7 @@
             "params": 333579264,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "vocabulary_size": 28996,
@@ -277,6 +286,7 @@
             "params": 4385920,
             "official_name": "BERT",
             "path": "bert",
+            "model_card": "https://github.com/google-research/bert/blob/master/README.md",
         },
         "config": {
             "backbone": {

diff --git a/keras_nlp/models/deberta_v3/deberta_v3_presets.py b/keras_nlp/models/deberta_v3/deberta_v3_presets.py
@@ -15,6 +15,16 @@
 
 backbone_presets = {
     "deberta_v3_extra_small_en": {
+        "metadata": {
+            "description": (
+                "12-layer DeBERTaV3 model where case is maintained. "
+                "Trained on English Wikipedia, BookCorpus and OpenWebText."
+            ),
+            "params": 70682112,
+            "official_name": "DeBERTaV3",
+            "path": "deberta_v3",
+            "model_card": "https://huggingface.co/microsoft/deberta-v3-xsmall",
+        },
         "config": {
             "vocabulary_size": 128100,
             "num_layers": 12,
@@ -26,21 +36,22 @@
             "bucket_size": 256,
         },
         "preprocessor_config": {},
+        "weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small_en/v1/model.h5",
+        "weights_hash": "d8e10327107e5c5e20b45548a5028619",
+        "spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small_en/v1/vocab.spm",
+        "spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
+    },
+    "deberta_v3_small_en": {
         "metadata": {
             "description": (
-                "12-layer DeBERTaV3 model where case is maintained. "
+                "6-layer DeBERTaV3 model where case is maintained. "
                 "Trained on English Wikipedia, BookCorpus and OpenWebText."
             ),
-            "params": 70682112,
+            "params": 141304320,
             "official_name": "DeBERTaV3",
             "path": "deberta_v3",
+            "model_card": "https://huggingface.co/microsoft/deberta-v3-small",
         },
-        "weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small_en/v1/model.h5",
-        "weights_hash": "d8e10327107e5c5e20b45548a5028619",
-        "spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_extra_small_en/v1/vocab.spm",
-        "spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
-    },
-    "deberta_v3_small_en": {
         "config": {
             "vocabulary_size": 128100,
             "num_layers": 6,
@@ -52,21 +63,22 @@
             "bucket_size": 256,
         },
         "preprocessor_config": {},
+        "weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small_en/v1/model.h5",
+        "weights_hash": "84118eb7c5a735f2061ecccaf71bb888",
+        "spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small_en/v1/vocab.spm",
+        "spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
+    },
+    "deberta_v3_base_en": {
         "metadata": {
             "description": (
-                "6-layer DeBERTaV3 model where case is maintained. "
+                "12-layer DeBERTaV3 model where case is maintained. "
                 "Trained on English Wikipedia, BookCorpus and OpenWebText."
             ),
-            "params": 141304320,
+            "params": 183831552,
             "official_name": "DeBERTaV3",
             "path": "deberta_v3",
+            "model_card": "https://huggingface.co/microsoft/deberta-v3-base",
         },
-        "weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small_en/v1/model.h5",
-        "weights_hash": "84118eb7c5a735f2061ecccaf71bb888",
-        "spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_small_en/v1/vocab.spm",
-        "spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
-    },
-    "deberta_v3_base_en": {
         "config": {
             "vocabulary_size": 128100,
             "num_layers": 12,
@@ -78,21 +90,22 @@
             "bucket_size": 256,
         },
         "preprocessor_config": {},
+        "weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_en/v1/model.h5",
+        "weights_hash": "cebce044aeed36aec9b94e3b8a255430",
+        "spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_en/v1/vocab.spm",
+        "spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
+    },
+    "deberta_v3_large_en": {
         "metadata": {
             "description": (
-                "12-layer DeBERTaV3 model where case is maintained. "
+                "24-layer DeBERTaV3 model where case is maintained. "
                 "Trained on English Wikipedia, BookCorpus and OpenWebText."
             ),
-            "params": 183831552,
+            "params": 434012160,
             "official_name": "DeBERTaV3",
             "path": "deberta_v3",
+            "model_card": "https://huggingface.co/microsoft/deberta-v3-large",
         },
-        "weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_en/v1/model.h5",
-        "weights_hash": "cebce044aeed36aec9b94e3b8a255430",
-        "spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_en/v1/vocab.spm",
-        "spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
-    },
-    "deberta_v3_large_en": {
         "config": {
             "vocabulary_size": 128100,
             "num_layers": 24,
@@ -104,21 +117,22 @@
             "bucket_size": 256,
         },
         "preprocessor_config": {},
-        "metadata": {
-            "description": (
-                "24-layer DeBERTaV3 model where case is maintained. "
-                "Trained on English Wikipedia, BookCorpus and OpenWebText."
-            ),
-            "params": 434012160,
-            "official_name": "DeBERTaV3",
-            "path": "deberta_v3",
-        },
         "weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large_en/v1/model.h5",
         "weights_hash": "bce7690f358a9e39304f8c0ebc71a745",
         "spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_large_en/v1/vocab.spm",
         "spm_proto_hash": "1613fcbf3b82999c187b09c9db79b568",
     },
     "deberta_v3_base_multi": {
+        "metadata": {
+            "description": (
+                "12-layer DeBERTaV3 model where case is maintained. "
+                "Trained on the 2.5TB multilingual CC100 dataset."
+            ),
+            "params": 278218752,
+            "official_name": "DeBERTaV3",
+            "path": "deberta_v3",
+            "model_card": "https://huggingface.co/microsoft/mdeberta-v3-base",
+        },
         "config": {
             "vocabulary_size": 251000,
             "num_layers": 12,
@@ -130,15 +144,6 @@
             "bucket_size": 256,
         },
         "preprocessor_config": {},
-        "metadata": {
-            "description": (
-                "12-layer DeBERTaV3 model where case is maintained. "
-                "Trained on the 2.5TB multilingual CC100 dataset."
-            ),
-            "params": 278218752,
-            "official_name": "DeBERTaV3",
-            "path": "deberta_v3",
-        },
         "weights_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_multi/v1/model.h5",
         "weights_hash": "26e5a824b26afd2ee336835bd337bbeb",
         "spm_proto_url": "https://storage.googleapis.com/keras-nlp/models/deberta_v3_base_multi/v1/vocab.spm",

diff --git a/keras_nlp/models/distil_bert/distil_bert_presets.py b/keras_nlp/models/distil_bert/distil_bert_presets.py
@@ -24,6 +24,7 @@
             "params": 66362880,
             "official_name": "DistilBERT",
             "path": "distil_bert",
+            "model_card": "https://huggingface.co/distilbert-base-uncased",
         },
         "config": {
             "vocabulary_size": 30522,
@@ -52,6 +53,7 @@
             "params": 65190912,
             "official_name": "DistilBERT",
             "path": "distil_bert",
+            "model_card": "https://huggingface.co/distilbert-base-cased",
         },
         "config": {
             "vocabulary_size": 28996,
@@ -78,6 +80,7 @@
             "params": 134734080,
             "official_name": "DistilBERT",
             "path": "distil_bert",
+            "model_card": "https://huggingface.co/distilbert-base-multilingual-cased",
         },
         "config": {
             "vocabulary_size": 119547,