From 78a25cc35742ab092adff8659ae60f5e3755489f Mon Sep 17 00:00:00 2001 From: abheesht17 Date: Tue, 17 Jan 2023 19:09:04 +0530 Subject: [PATCH 1/5] Add Base Task Class --- keras_nlp/models/bert/bert_classifier.py | 176 +++--------------- .../deberta_v3/deberta_v3_classifier.py | 158 +++------------- .../distil_bert/distil_bert_classifier.py | 160 +++------------- .../models/roberta/roberta_classifier.py | 156 +++------------- keras_nlp/models/task.py | 131 +++++++++++++ .../xlm_roberta/xlm_roberta_classifier.py | 154 +++------------ 6 files changed, 283 insertions(+), 652 deletions(-) create mode 100644 keras_nlp/models/task.py diff --git a/keras_nlp/models/bert/bert_classifier.py b/keras_nlp/models/bert/bert_classifier.py index 187578434f..5027d0971b 100644 --- a/keras_nlp/models/bert/bert_classifier.py +++ b/keras_nlp/models/bert/bert_classifier.py @@ -14,7 +14,6 @@ """BERT classification model.""" import copy -import os from tensorflow import keras @@ -23,7 +22,7 @@ from keras_nlp.models.bert.bert_preprocessor import BertPreprocessor from keras_nlp.models.bert.bert_presets import backbone_presets from keras_nlp.models.bert.bert_presets import classifier_presets -from keras_nlp.utils.pipeline_model import PipelineModel +from keras_nlp.models.task import Task from keras_nlp.utils.python_utils import classproperty from keras_nlp.utils.python_utils import format_docstring @@ -31,7 +30,7 @@ @keras.utils.register_keras_serializable(package="keras_nlp") -class BertClassifier(PipelineModel): +class BertClassifier(Task): """An end-to-end BERT model for classification tasks This model attaches a classification head to a `keras_nlp.model.BertBackbone` @@ -122,166 +121,49 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone - self._preprocessor = preprocessor self.num_classes = num_classes - - def preprocess_samples(self, x, y=None, sample_weight=None): - return self.preprocessor(x, y=y, sample_weight=sample_weight) - - @property - def backbone(self): - """A `keras_nlp.models.BertBackbone` instance providing the encoder - submodel. - """ - return self._backbone - - @property - def preprocessor(self): - """A `keras_nlp.models.BertPreprocessor` for preprocessing inputs.""" - return self._preprocessor + self.dropout = dropout + self._preprocessor = preprocessor def get_config(self): - return { - "backbone": keras.layers.serialize(self.backbone), - "preprocessor": keras.layers.serialize(self.preprocessor), - "num_classes": self.num_classes, - "name": self.name, - "trainable": self.trainable, - } + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "dropout": self.dropout, + "name": self.name, + "trainable": self.trainable, + } + ) + return config - @classmethod - def from_config(cls, config): - if "backbone" in config and isinstance(config["backbone"], dict): - config["backbone"] = keras.layers.deserialize(config["backbone"]) - if "preprocessor" in config and isinstance( - config["preprocessor"], dict - ): - config["preprocessor"] = keras.layers.deserialize( - config["preprocessor"] - ) - return cls(**config) + @classproperty + def backbone_cls(cls): + return BertBackbone + + @classproperty + def preprocessor_cls(cls): + return BertPreprocessor @classproperty def presets(cls): return copy.deepcopy({**backbone_presets, **classifier_presets}) @classmethod - @format_docstring(names=PRESET_NAMES) def from_preset( cls, preset, load_weights=True, **kwargs, ): - """Create a classification model from a preset architecture and weights. - - By default, this method will automatically create a `preprocessor` - layer to preprocess raw inputs during `fit()`, `predict()`, and - `evaluate()`. If you would like to disable this behavior, pass - `preprocessor=None`. - - Args: - preset: string. Must be one of {{names}}. - load_weights: Whether to load pre-trained weights into model. - Defaults to `True`. - - Examples: - - Raw string inputs. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Create a BertClassifier and fit your data. - classifier = keras_nlp.models.BertClassifier.from_preset( - "bert_base_en_uncased", - num_classes=4, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + return super().from_preset( + preset=preset, load_weights=load_weights, **kwargs ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - Raw string inputs with customized preprocessing. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Use a shorter sequence length. - preprocessor = keras_nlp.models.BertPreprocessor.from_preset( - "bert_base_en_uncased", - sequence_length=128, - ) - - # Create a BertClassifier and fit your data. - classifier = keras_nlp.models.BertClassifier.from_preset( - "bert_base_en_uncased", - num_classes=4, - preprocessor=preprocessor, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Preprocessed inputs. - ```python - # Create a dataset with preprocessed features in an `(x, y)` format. - preprocessed_features = { - "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), - "segment_ids": tf.constant( - [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - } - labels = [0, 3] - - # Create a BERT classifier and fit your data. - classifier = keras_nlp.models.BertClassifier.from_preset( - "bert_base_en_uncased", - num_classes=4, - preprocessor=None, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) - ``` - """ - if preset not in cls.presets: - raise ValueError( - "`preset` must be one of " - f"""{", ".join(cls.presets)}. Received: {preset}.""" - ) - - if "preprocessor" not in kwargs: - kwargs["preprocessor"] = BertPreprocessor.from_preset(preset) - - # Check if preset is backbone-only model - if preset in BertBackbone.presets: - backbone = BertBackbone.from_preset(preset, load_weights) - return cls(backbone, **kwargs) - - # Otherwise must be one of class presets - metadata = cls.presets[preset] - config = metadata["config"] - model = cls.from_config({**config, **kwargs}) - - if not load_weights: - return model - - weights = keras.utils.get_file( - "model.h5", - metadata["weights_url"], - cache_subdir=os.path.join("models", preset), - file_hash=metadata["weights_hash"], - ) - model.load_weights(weights) - return model +BertClassifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ +format_docstring( + model_task_name=BertClassifier.__name__, + example_preset_name="bert_base_en_uncased", + preset_names=PRESET_NAMES, +)(BertClassifier.from_preset.__func__) diff --git a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py index 3eee700633..acb207bca8 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py @@ -25,13 +25,15 @@ DebertaV3Preprocessor, ) from keras_nlp.models.deberta_v3.deberta_v3_presets import backbone_presets -from keras_nlp.utils.pipeline_model import PipelineModel +from keras_nlp.models.task import Task from keras_nlp.utils.python_utils import classproperty from keras_nlp.utils.python_utils import format_docstring +PRESET_NAMES = ", ".join(list(backbone_presets)) + @keras.utils.register_keras_serializable(package="keras_nlp") -class DebertaV3Classifier(PipelineModel): +class DebertaV3Classifier(Task): """An end-to-end DeBERTa model for classification tasks. This model attaches a classification head to a @@ -128,149 +130,51 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone - self._preprocessor = preprocessor self.num_classes = num_classes self.hidden_dim = hidden_dim self.dropout = dropout - - def preprocess_samples(self, x, y=None, sample_weight=None): - return self.preprocessor(x, y=y, sample_weight=sample_weight) - - @property - def backbone(self): - """A `keras_nlp.models.DebertaV3Backbone` submodel.""" - return self._backbone - - @property - def preprocessor(self): - """A `keras_nlp.models.DebertaV3Preprocessor` preprocessing layer.""" - return self._preprocessor + self._preprocessor = preprocessor def get_config(self): - return { - "backbone": keras.layers.serialize(self.backbone), - "preprocessor": keras.layers.serialize(self.preprocessor), - "num_classes": self.num_classes, - "hidden_dim": self.hidden_dim, - "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, - } + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "hidden_dim": self.hidden_dim, + "dropout": self.dropout, + "name": self.name, + "trainable": self.trainable, + } + ) + return config - @classmethod - def from_config(cls, config): - if "backbone" in config and isinstance(config["backbone"], dict): - config["backbone"] = keras.layers.deserialize(config["backbone"]) - if "preprocessor" in config and isinstance( - config["preprocessor"], dict - ): - config["preprocessor"] = keras.layers.deserialize( - config["preprocessor"] - ) - return cls(**config) + @classproperty + def backbone_cls(cls): + return DebertaV3Backbone + + @classproperty + def preprocessor_cls(cls): + return DebertaV3Preprocessor @classproperty def presets(cls): return copy.deepcopy(backbone_presets) @classmethod - @format_docstring(names=", ".join(backbone_presets)) def from_preset( cls, preset, load_weights=True, **kwargs, ): - """Create a classification model from a preset architecture and weights. - - By default, this method will automatically create a `preprocessor` - layer to preprocess raw inputs during `fit()`, `predict()`, and - `evaluate()`. If you would like to disable this behavior, pass - `preprocessor=None`. - - Args: - preset: string. Must be one of {{names}}. - load_weights: Whether to load pre-trained weights into model. - Defaults to `True`. - - Examples: - - Raw string inputs. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Create a DebertaV3Classifier and fit your data. - classifier = keras_nlp.models.DebertaV3Classifier.from_preset( - "deberta_v3_base_en", - num_classes=4, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Raw string inputs with customized preprocessing. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Use a shorter sequence length. - preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset( - "deberta_v3_base_en", - sequence_length=128, - ) - - # Create a DebertaV3Classifier and fit your data. - classifier = keras_nlp.models.DebertaV3Classifier.from_preset( - "deberta_v3_base_en", - num_classes=4, - preprocessor=preprocessor, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Preprocessed inputs. - ```python - # Create a dataset with preprocessed features in an `(x, y)` format. - preprocessed_features = { - "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - } - labels = [0, 3] - - # Create a DebertaV3Classifier and fit your data. - classifier = keras_nlp.models.DebertaV3Classifier.from_preset( - "deberta_v3_base_en", - num_classes=4, - preprocessor=None, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + return super().from_preset( + preset=preset, load_weights=load_weights, **kwargs ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) - ``` - """ - if "preprocessor" not in kwargs: - kwargs["preprocessor"] = DebertaV3Preprocessor.from_preset(preset) - # Check if preset is backbone-only model - if preset in DebertaV3Backbone.presets: - backbone = DebertaV3Backbone.from_preset(preset, load_weights) - return cls(backbone, **kwargs) - # Otherwise must be one of class presets - # Currently no classifier-level presets, so must throw. - if preset not in cls.presets: - raise ValueError( - "`preset` must be one of " - f"""{", ".join(cls.presets)}. Received: {preset}.""" - ) +DebertaV3Classifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ +format_docstring( + model_task_name=DebertaV3Classifier.__name__, + example_preset_name="deberta_v3_base_en", + preset_names=PRESET_NAMES, +)(DebertaV3Classifier.from_preset.__func__) diff --git a/keras_nlp/models/distil_bert/distil_bert_classifier.py b/keras_nlp/models/distil_bert/distil_bert_classifier.py index fb9a945cea..9fdf559385 100644 --- a/keras_nlp/models/distil_bert/distil_bert_classifier.py +++ b/keras_nlp/models/distil_bert/distil_bert_classifier.py @@ -25,13 +25,15 @@ DistilBertPreprocessor, ) from keras_nlp.models.distil_bert.distil_bert_presets import backbone_presets -from keras_nlp.utils.pipeline_model import PipelineModel +from keras_nlp.models.task import Task from keras_nlp.utils.python_utils import classproperty from keras_nlp.utils.python_utils import format_docstring +PRESET_NAMES = ", ".join(list(backbone_presets)) + @keras.utils.register_keras_serializable(package="keras_nlp") -class DistilBertClassifier(PipelineModel): +class DistilBertClassifier(Task): """An end-to-end DistilBERT model for classification tasks. This model attaches a classification head to a @@ -127,151 +129,51 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone - self._preprocessor = preprocessor self.num_classes = num_classes self.hidden_dim = hidden_dim self.dropout = dropout - - def preprocess_samples(self, x, y=None, sample_weight=None): - return self.preprocessor(x, y=y, sample_weight=sample_weight) - - @property - def backbone(self): - """A `keras_nlp.models.DistilBertBackbone` submodel.""" - return self._backbone - - @property - def preprocessor(self): - """A `keras_nlp.models.DistilBertPreprocessor` preprocessing layer.""" - return self._preprocessor + self._preprocessor = preprocessor def get_config(self): - return { - "backbone": keras.layers.serialize(self.backbone), - "preprocessor": keras.layers.serialize(self.preprocessor), - "num_classes": self.num_classes, - "hidden_dim": self.hidden_dim, - "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, - } + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "hidden_dim": self.hidden_dim, + "dropout": self.dropout, + "name": self.name, + "trainable": self.trainable, + } + ) + return config - @classmethod - def from_config(cls, config): - if "backbone" in config and isinstance(config["backbone"], dict): - config["backbone"] = keras.layers.deserialize(config["backbone"]) - if "preprocessor" in config and isinstance( - config["preprocessor"], dict - ): - config["preprocessor"] = keras.layers.deserialize( - config["preprocessor"] - ) - return cls(**config) + @classproperty + def backbone_cls(cls): + return DistilBertBackbone + + @classproperty + def preprocessor_cls(cls): + return DistilBertPreprocessor @classproperty def presets(cls): return copy.deepcopy(backbone_presets) @classmethod - @format_docstring(names=", ".join(backbone_presets)) def from_preset( cls, preset, load_weights=True, **kwargs, ): - """Create a classification model from a preset architecture and weights. - - By default, this method will automatically create a `preprocessor` - layer to preprocess raw inputs during `fit()`, `predict()`, and - `evaluate()`. If you would like to disable this behavior, pass - `preprocessor=None`. - - Args: - preset: string. Must be one of {{names}}. - load_weights: Whether to load pre-trained weights into model. - Defaults to `True`. - - Examples: - - Raw string inputs. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Create a DistilBertClassifier and fit your data. - classifier = keras_nlp.models.DistilBertClassifier.from_preset( - "distil_bert_base_en_uncased", - num_classes=4, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Raw string inputs with customized preprocessing. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Use a shorter sequence length. - preprocessor = keras_nlp.models.DistilBertBackbone.from_preset( - "distil_bert_base_en_uncased", - sequence_length=128, - ) - # Create a DistilBertClassifier and fit your data. - classifier = keras_nlp.models.DistilBertClassifier.from_preset( - "distil_bert_base_en_uncased", - num_classes=4, - preprocessor=preprocessor, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Preprocessed inputs. - ```python - # Create a dataset with preprocessed features in an `(x, y)` format. - preprocessed_features = { - "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), - "segment_ids": tf.constant( - [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - } - labels = [0, 3] - - # Create a DistilBERT classifier and fit your data. - classifier = keras_nlp.models.DistilBertClassifier.from_preset( - "distil_bert_base_en_uncased", - num_classes=4, - preprocessor=None, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + return super().from_preset( + preset=preset, load_weights=load_weights, **kwargs ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) - ``` - """ - if "preprocessor" not in kwargs: - kwargs["preprocessor"] = DistilBertPreprocessor.from_preset(preset) - # Check if preset is backbone-only model - if preset in DistilBertBackbone.presets: - backbone = DistilBertBackbone.from_preset(preset, load_weights) - return cls(backbone, **kwargs) - # Otherwise must be one of class presets - # Currently no classifier-level presets, so must throw. - if preset not in cls.presets: - raise ValueError( - "`preset` must be one of " - f"""{", ".join(cls.presets)}. Received: {preset}.""" - ) +DistilBertClassifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ +format_docstring( + model_task_name=DistilBertClassifier.__name__, + example_preset_name="distil_bert_base_en_uncased", + preset_names=PRESET_NAMES, +)(DistilBertClassifier.from_preset.__func__) diff --git a/keras_nlp/models/roberta/roberta_classifier.py b/keras_nlp/models/roberta/roberta_classifier.py index 0cbb79fdeb..86fb45fe5e 100644 --- a/keras_nlp/models/roberta/roberta_classifier.py +++ b/keras_nlp/models/roberta/roberta_classifier.py @@ -21,13 +21,15 @@ from keras_nlp.models.roberta.roberta_backbone import roberta_kernel_initializer from keras_nlp.models.roberta.roberta_preprocessor import RobertaPreprocessor from keras_nlp.models.roberta.roberta_presets import backbone_presets -from keras_nlp.utils.pipeline_model import PipelineModel +from keras_nlp.models.task import Task from keras_nlp.utils.python_utils import classproperty from keras_nlp.utils.python_utils import format_docstring +PRESET_NAMES = ", ".join(list(backbone_presets)) + @keras.utils.register_keras_serializable(package="keras_nlp") -class RobertaClassifier(PipelineModel): +class RobertaClassifier(Task): """An end-to-end RoBERTa model for classification tasks. This model attaches a classification head to a @@ -129,144 +131,46 @@ def __init__( self.dropout = dropout self._preprocessor = preprocessor - def preprocess_samples(self, x, y=None, sample_weight=None): - return self.preprocessor(x, y=y, sample_weight=sample_weight) - - @property - def backbone(self): - """The associated `keras_nlp.models.RobertaBackbone`.""" - return self._backbone - - @property - def preprocessor(self): - """A `keras_nlp.models.RobertaPreprocessor` for preprocessing inputs.""" - return self._preprocessor - def get_config(self): - return { - "backbone": keras.layers.serialize(self.backbone), - "preprocessor": keras.layers.serialize(self.preprocessor), - "num_classes": self.num_classes, - "hidden_dim": self.hidden_dim, - "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, - } + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "hidden_dim": self.hidden_dim, + "dropout": self.dropout, + "name": self.name, + "trainable": self.trainable, + } + ) + return config - @classmethod - def from_config(cls, config): - if "backbone" in config and isinstance(config["backbone"], dict): - config["backbone"] = keras.layers.deserialize(config["backbone"]) - if "preprocessor" in config and isinstance( - config["preprocessor"], dict - ): - config["preprocessor"] = keras.layers.deserialize( - config["preprocessor"] - ) - return cls(**config) + @classproperty + def backbone_cls(cls): + return RobertaBackbone + + @classproperty + def preprocessor_cls(cls): + return RobertaPreprocessor @classproperty def presets(cls): return copy.deepcopy(backbone_presets) @classmethod - @format_docstring(names=", ".join(backbone_presets)) def from_preset( cls, preset, load_weights=True, **kwargs, ): - """Create a classification model from a preset architecture and weights. - - By default, this method will automatically create a `preprocessor` - layer to preprocess raw inputs during `fit()`, `predict()`, and - `evaluate()`. If you would like to disable this behavior, pass - `preprocessor=None`. - - Args: - preset: string. Must be one of {{names}}. - load_weights: Whether to load pre-trained weights into model. - Defaults to `True`. - - Examples: - - Raw string inputs. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Create a RobertClassifier and fit your data. - classifier = keras_nlp.models.RobertClassifier.from_preset( - "roberta_base_en", - num_classes=4, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Raw string inputs with customized preprocessing. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Use a shorter sequence length. - preprocessor = keras_nlp.models.RobertPreprocessor.from_preset( - "roberta_base_en", - sequence_length=128, - ) - - # Create a RobertClassifier and fit your data. - classifier = keras_nlp.models.RobertClassifier.from_preset( - "roberta_base_en", - num_classes=4, - preprocessor=preprocessor, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Preprocessed inputs. - ```python - # Create a dataset with preprocessed features in an `(x, y)` format. - preprocessed_features = { - "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - } - labels = [0, 3] - - # Create a RoBERTa classifier and fit your data. - classifier = keras_nlp.models.RobertClassifier.from_preset( - "roberta_base_en", - num_classes=4, - preprocessor=None, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + return super().from_preset( + preset=preset, load_weights=load_weights, **kwargs ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) - ``` - """ - if "preprocessor" not in kwargs: - kwargs["preprocessor"] = RobertaPreprocessor.from_preset(preset) - # Check if preset is backbone-only model - if preset in RobertaBackbone.presets: - backbone = RobertaBackbone.from_preset(preset, load_weights) - return cls(backbone, **kwargs) - # Otherwise must be one of class presets. - # Currently no classifier-level presets, so we raise ValueError. - if preset not in cls.presets: - raise ValueError( - "`preset` must be one of " - f"""{", ".join(cls.presets)}. Received: {preset}.""" - ) +RobertaClassifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ +format_docstring( + model_task_name=RobertaClassifier.__name__, + example_preset_name="roberta_base_en", + preset_names=PRESET_NAMES, +)(RobertaClassifier.from_preset.__func__) diff --git a/keras_nlp/models/task.py b/keras_nlp/models/task.py new file mode 100644 index 0000000000..b50e7898ca --- /dev/null +++ b/keras_nlp/models/task.py @@ -0,0 +1,131 @@ +# Copyright 2022 The KerasNLP Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Base class for Task models.""" + +import os + +from tensorflow import keras + +from keras_nlp.utils.pipeline_model import PipelineModel +from keras_nlp.utils.python_utils import classproperty + + +@keras.utils.register_keras_serializable(package="keras_nlp") +class Task(PipelineModel): + """Base class for Task models.""" + + def preprocess_samples(self, x, y=None, sample_weight=None): + return self.preprocessor(x, y=y, sample_weight=sample_weight) + + @property + def backbone(self): + """A `keras_nlp.models.backbone.Backbone` instance providing the encoder submodel.""" + return self._backbone + + @property + def preprocessor(self): + """A `keras_nlp.models.preprocessor.Preprocessor` instance for preprocessing inputs.""" + return self._preprocessor + + def get_config(self): + return { + "backbone": keras.layers.serialize(self.backbone), + "preprocessor": keras.layers.serialize(self.preprocessor), + } + + @classmethod + def from_config(cls, config): + if "backbone" in config and isinstance(config["backbone"], dict): + config["backbone"] = keras.layers.deserialize(config["backbone"]) + if "preprocessor" in config and isinstance( + config["preprocessor"], dict + ): + config["preprocessor"] = keras.layers.deserialize( + config["preprocessor"] + ) + return cls(**config) + + @classproperty + def backbone_cls(cls): + return None + + @classproperty + def preprocessor_cls(cls): + return None + + @classproperty + def presets(cls): + return {} + + def from_preset( + cls, + preset, + load_weights=True, + **kwargs, + ): + """Instantiate {{model_task_name}} model from preset architecture and weights. + + Args: + preset: string. Must be one of "{{preset_names}}". + load_weights: Whether to load pre-trained weights into model. + Defaults to `True`. + + Examples: + ```python + # Load architecture and weights from preset + model = {{model_task_name}}.from_preset("{{example_preset_name}}") + + # Load randomly initialized model from preset architecture + model = {{model_task_name}}.from_preset( + "{{example_preset_name}}", + load_weights=False + ) + ``` + """ + if not cls.presets: + raise NotImplementedError( + "No presets have been created for this class." + ) + + if preset not in cls.presets: + raise ValueError( + "`preset` must be one of " + f"""{", ".join(cls.presets)}. Received: {preset}.""" + ) + + if "preprocessor" not in kwargs: + kwargs["preprocessor"] = cls.preprocessor_cls.from_preset(preset) + + # Check if preset is backbone-only model + if preset in cls.backbone_cls.presets: + backbone = cls.backbone_cls.from_preset(preset, load_weights) + return cls(backbone, **kwargs) + + # Otherwise must be one of class presets + metadata = cls.presets[preset] + config = metadata["config"] + model = cls.from_config({**config, **kwargs}) + + if not load_weights: + return model + + weights = keras.utils.get_file( + "model.h5", + metadata["weights_url"], + cache_subdir=os.path.join("models", preset), + file_hash=metadata["weights_hash"], + ) + + model.load_weights(weights) + return model diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py index 7482666f4a..5852d32e8a 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py @@ -18,18 +18,20 @@ from tensorflow import keras from keras_nlp.models.roberta.roberta_backbone import roberta_kernel_initializer +from keras_nlp.models.task import Task from keras_nlp.models.xlm_roberta.xlm_roberta_backbone import XLMRobertaBackbone from keras_nlp.models.xlm_roberta.xlm_roberta_preprocessor import ( XLMRobertaPreprocessor, ) from keras_nlp.models.xlm_roberta.xlm_roberta_presets import backbone_presets -from keras_nlp.utils.pipeline_model import PipelineModel from keras_nlp.utils.python_utils import classproperty from keras_nlp.utils.python_utils import format_docstring +PRESET_NAMES = ", ".join(list(backbone_presets)) + @keras.utils.register_keras_serializable(package="keras_nlp") -class XLMRobertaClassifier(PipelineModel): +class XLMRobertaClassifier(Task): """An end-to-end XLM-RoBERTa model for classification tasks. This model attaches a classification head to a @@ -123,46 +125,34 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone - self._preprocessor = preprocessor self.num_classes = num_classes self.hidden_dim = hidden_dim self.dropout = dropout + self._preprocessor = preprocessor def preprocess_samples(self, x, y=None, sample_weight=None): return self.preprocessor(x, y=y, sample_weight=sample_weight) - @property - def backbone(self): - """A `keras_nlp.models.XLMRobertaBackbone` submodel.""" - return self._backbone - - @property - def preprocessor(self): - """A `keras_nlp.models.XLMRobertaPreprocessor` preprocessing layer.""" - return self._preprocessor - def get_config(self): - return { - "backbone": keras.layers.serialize(self.backbone), - "preprocessor": keras.layers.serialize(self.preprocessor), - "num_classes": self.num_classes, - "hidden_dim": self.hidden_dim, - "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, - } + config = super().get_config() + config.update( + { + "num_classes": self.num_classes, + "hidden_dim": self.hidden_dim, + "dropout": self.dropout, + "name": self.name, + "trainable": self.trainable, + } + ) + return config - @classmethod - def from_config(cls, config): - if "backbone" in config and isinstance(config["backbone"], dict): - config["backbone"] = keras.layers.deserialize(config["backbone"]) - if "preprocessor" in config and isinstance( - config["preprocessor"], dict - ): - config["preprocessor"] = keras.layers.deserialize( - config["preprocessor"] - ) - return cls(**config) + @classproperty + def backbone_cls(cls): + return XLMRobertaBackbone + + @classproperty + def preprocessor_cls(cls): + return XLMRobertaPreprocessor @classproperty def presets(cls): @@ -176,96 +166,14 @@ def from_preset( load_weights=True, **kwargs, ): - """Create a classification model from a preset architecture and weights. - - By default, this method will automatically create a `preprocessor` - layer to preprocess raw inputs during `fit()`, `predict()`, and - `evaluate()`. If you would like to disable this behavior, pass - `preprocessor=None`. - - Args: - preset: string. Must be one of {{names}}. - load_weights: Whether to load pre-trained weights into model. - Defaults to `True`. - - Examples: - - Raw string inputs. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Create a XLMRobertaClassifier and fit your data. - classifier = keras_nlp.models.XLMRobertaClassifier.from_preset( - "xlm_roberta_base_multi", - num_classes=4, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Raw string inputs with customized preprocessing. - ```python - # Create a dataset with raw string features in an `(x, y)` format. - features = ["The quick brown fox jumped.", "I forgot my homework."] - labels = [0, 3] - - # Use a shorter sequence length. - preprocessor = keras_nlp.models.XLMRobertaPreprocessor.from_preset( - "xlm_roberta_base_multi", - sequence_length=128, - ) - - # Create a XLMRobertaClassifier and fit your data. - classifier = keras_nlp.models.XLMRobertaClassifier.from_preset( - "xlm_roberta_base_multi", - num_classes=4, - preprocessor=preprocessor, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), - ) - classifier.fit(x=features, y=labels, batch_size=2) - ``` - - Preprocessed inputs. - ```python - # Create a dataset with preprocessed features in an `(x, y)` format. - preprocessed_features = { - "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), - "padding_mask": tf.constant( - [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) - ), - } - labels = [0, 3] - - # Create a XLMRobertaClassifier and fit your data. - classifier = keras_nlp.models.XLMRobertaClassifier.from_preset( - "xlm_roberta_base_multi", - num_classes=4, - preprocessor=None, - ) - classifier.compile( - loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + return super().from_preset( + preset=preset, load_weights=load_weights, **kwargs ) - classifier.fit(x=preprocessed_features, y=labels, batch_size=2) - ``` - """ - if "preprocessor" not in kwargs: - kwargs["preprocessor"] = XLMRobertaPreprocessor.from_preset(preset) - # Check if preset is backbone-only model - if preset in XLMRobertaBackbone.presets: - backbone = XLMRobertaBackbone.from_preset(preset, load_weights) - return cls(backbone, **kwargs) - # Otherwise must be one of class presets - # Currently no classifier-level presets, so must throw. - if preset not in cls.presets: - raise ValueError( - "`preset` must be one of " - f"""{", ".join(cls.presets)}. Received: {preset}.""" - ) +XLMRobertaClassifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ +format_docstring( + model_task_name=XLMRobertaClassifier.__name__, + example_preset_name="xlm_roberta_base_multi", + preset_names=PRESET_NAMES, +)(XLMRobertaClassifier.from_preset.__func__) From d2e9de5a82f183dac38cdcdb3181be6563b7738d Mon Sep 17 00:00:00 2001 From: abheesht17 Date: Tue, 17 Jan 2023 19:14:24 +0530 Subject: [PATCH 2/5] Minor fix --- keras_nlp/models/task.py | 1 + 1 file changed, 1 insertion(+) diff --git a/keras_nlp/models/task.py b/keras_nlp/models/task.py index b50e7898ca..89465bf0fc 100644 --- a/keras_nlp/models/task.py +++ b/keras_nlp/models/task.py @@ -68,6 +68,7 @@ def preprocessor_cls(cls): def presets(cls): return {} + @classmethod def from_preset( cls, preset, From 6db9d6d3cd8d79c8f30ef20b651280dd16a0bb8c Mon Sep 17 00:00:00 2001 From: abheesht17 Date: Tue, 17 Jan 2023 19:23:28 +0530 Subject: [PATCH 3/5] Fixes --- keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py | 1 - 1 file changed, 1 deletion(-) diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py index 5852d32e8a..7c385d910d 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py @@ -159,7 +159,6 @@ def presets(cls): return copy.deepcopy(backbone_presets) @classmethod - @format_docstring(names=", ".join(backbone_presets)) def from_preset( cls, preset, From 6fb860f4bdf7894e4a9e655938f9d70fe4378bc7 Mon Sep 17 00:00:00 2001 From: abheesht17 Date: Tue, 17 Jan 2023 20:31:47 +0530 Subject: [PATCH 4/5] Minor edit --- keras_nlp/models/bert/bert_classifier.py | 2 +- keras_nlp/models/deberta_v3/deberta_v3_classifier.py | 2 +- keras_nlp/models/distil_bert/distil_bert_classifier.py | 2 +- keras_nlp/models/roberta/roberta_classifier.py | 2 +- keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/keras_nlp/models/bert/bert_classifier.py b/keras_nlp/models/bert/bert_classifier.py index 5027d0971b..be1a8e462a 100644 --- a/keras_nlp/models/bert/bert_classifier.py +++ b/keras_nlp/models/bert/bert_classifier.py @@ -121,9 +121,9 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone + self._preprocessor = preprocessor self.num_classes = num_classes self.dropout = dropout - self._preprocessor = preprocessor def get_config(self): config = super().get_config() diff --git a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py index acb207bca8..fdfa00ef6f 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py @@ -130,10 +130,10 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone + self._preprocessor = preprocessor self.num_classes = num_classes self.hidden_dim = hidden_dim self.dropout = dropout - self._preprocessor = preprocessor def get_config(self): config = super().get_config() diff --git a/keras_nlp/models/distil_bert/distil_bert_classifier.py b/keras_nlp/models/distil_bert/distil_bert_classifier.py index 9fdf559385..53e132d0f5 100644 --- a/keras_nlp/models/distil_bert/distil_bert_classifier.py +++ b/keras_nlp/models/distil_bert/distil_bert_classifier.py @@ -129,10 +129,10 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone + self._preprocessor = preprocessor self.num_classes = num_classes self.hidden_dim = hidden_dim self.dropout = dropout - self._preprocessor = preprocessor def get_config(self): config = super().get_config() diff --git a/keras_nlp/models/roberta/roberta_classifier.py b/keras_nlp/models/roberta/roberta_classifier.py index 86fb45fe5e..212fe33923 100644 --- a/keras_nlp/models/roberta/roberta_classifier.py +++ b/keras_nlp/models/roberta/roberta_classifier.py @@ -126,10 +126,10 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone + self._preprocessor = preprocessor self.num_classes = num_classes self.hidden_dim = hidden_dim self.dropout = dropout - self._preprocessor = preprocessor def get_config(self): config = super().get_config() diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py index 7c385d910d..bb50640f15 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py @@ -125,10 +125,10 @@ def __init__( ) # All references to `self` below this line self._backbone = backbone + self._preprocessor = preprocessor self.num_classes = num_classes self.hidden_dim = hidden_dim self.dropout = dropout - self._preprocessor = preprocessor def preprocess_samples(self, x, y=None, sample_weight=None): return self.preprocessor(x, y=y, sample_weight=sample_weight) From 8724f8127d7e0010dcfee2d0728ff3f384d7d36b Mon Sep 17 00:00:00 2001 From: abheesht17 Date: Wed, 18 Jan 2023 10:03:31 +0530 Subject: [PATCH 5/5] Address comments --- keras_nlp/models/bert/bert_classifier.py | 94 ++++++++++++++----- .../deberta_v3/deberta_v3_classifier.py | 93 +++++++++++++----- .../distil_bert/distil_bert_classifier.py | 94 ++++++++++++++----- .../models/roberta/roberta_classifier.py | 92 +++++++++++++----- keras_nlp/models/task.py | 29 +++++- .../xlm_roberta/xlm_roberta_classifier.py | 92 +++++++++++++----- 6 files changed, 367 insertions(+), 127 deletions(-) diff --git a/keras_nlp/models/bert/bert_classifier.py b/keras_nlp/models/bert/bert_classifier.py index be1a8e462a..5d555a745c 100644 --- a/keras_nlp/models/bert/bert_classifier.py +++ b/keras_nlp/models/bert/bert_classifier.py @@ -24,9 +24,6 @@ from keras_nlp.models.bert.bert_presets import classifier_presets from keras_nlp.models.task import Task from keras_nlp.utils.python_utils import classproperty -from keras_nlp.utils.python_utils import format_docstring - -PRESET_NAMES = ", ".join(list(backbone_presets) + list(classifier_presets)) @keras.utils.register_keras_serializable(package="keras_nlp") @@ -57,8 +54,9 @@ class BertClassifier(Task): Examples: + Example usage. ```python - # Call classifier on the inputs. + # Define the preprocessed inputs. preprocessed_features = { "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), "segment_ids": tf.constant( @@ -94,6 +92,73 @@ class BertClassifier(Task): # Access backbone programatically (e.g., to change `trainable`) classifier.backbone.trainable = False ``` + + Raw string inputs. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Create a BertClassifier and fit your data. + classifier = keras_nlp.models.BertClassifier.from_preset( + "bert_base_en_uncased", + num_classes=4, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Raw string inputs with customized preprocessing. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Use a shorter sequence length. + preprocessor = keras_nlp.models.BertPreprocessor.from_preset( + "bert_base_en_uncased", + sequence_length=128, + ) + + # Create a BertClassifier and fit your data. + classifier = keras_nlp.models.BertClassifier.from_preset( + "bert_base_en_uncased", + num_classes=4, + preprocessor=preprocessor, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Preprocessed inputs. + ```python + # Create a dataset with preprocessed features in an `(x, y)` format. + preprocessed_features = { + "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), + "segment_ids": tf.constant( + [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) + ), + "padding_mask": tf.constant( + [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) + ), + } + labels = [0, 3] + + # Create a BERT classifier and fit your data. + classifier = keras_nlp.models.BertClassifier.from_preset( + "bert_base_en_uncased", + num_classes=4, + preprocessor=None, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=preprocessed_features, y=labels, batch_size=2) + ``` """ def __init__( @@ -131,8 +196,6 @@ def get_config(self): { "num_classes": self.num_classes, "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, } ) return config @@ -148,22 +211,3 @@ def preprocessor_cls(cls): @classproperty def presets(cls): return copy.deepcopy({**backbone_presets, **classifier_presets}) - - @classmethod - def from_preset( - cls, - preset, - load_weights=True, - **kwargs, - ): - return super().from_preset( - preset=preset, load_weights=load_weights, **kwargs - ) - - -BertClassifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ -format_docstring( - model_task_name=BertClassifier.__name__, - example_preset_name="bert_base_en_uncased", - preset_names=PRESET_NAMES, -)(BertClassifier.from_preset.__func__) diff --git a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py index fdfa00ef6f..ab078ebda8 100644 --- a/keras_nlp/models/deberta_v3/deberta_v3_classifier.py +++ b/keras_nlp/models/deberta_v3/deberta_v3_classifier.py @@ -27,9 +27,6 @@ from keras_nlp.models.deberta_v3.deberta_v3_presets import backbone_presets from keras_nlp.models.task import Task from keras_nlp.utils.python_utils import classproperty -from keras_nlp.utils.python_utils import format_docstring - -PRESET_NAMES = ", ".join(list(backbone_presets)) @keras.utils.register_keras_serializable(package="keras_nlp") @@ -61,8 +58,11 @@ class DebertaV3Classifier(Task): `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: + + Example usage. ```python + # Define the preprocessed inputs. preprocessed_features = { "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), "padding_mask": tf.constant( @@ -92,6 +92,70 @@ class DebertaV3Classifier(Task): # Access backbone programatically (e.g., to change `trainable`) classifier.backbone.trainable = False ``` + + Raw string inputs. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Create a DebertaV3Classifier and fit your data. + classifier = keras_nlp.models.DebertaV3Classifier.from_preset( + "deberta_v3_base_en", + num_classes=4, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Raw string inputs with customized preprocessing. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Use a shorter sequence length. + preprocessor = keras_nlp.models.DebertaV3Preprocessor.from_preset( + "deberta_v3_base_en", + sequence_length=128, + ) + + # Create a DebertaV3Classifier and fit your data. + classifier = keras_nlp.models.DebertaV3Classifier.from_preset( + "deberta_v3_base_en", + num_classes=4, + preprocessor=preprocessor, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Preprocessed inputs. + ```python + # Create a dataset with preprocessed features in an `(x, y)` format. + preprocessed_features = { + "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), + "padding_mask": tf.constant( + [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) + ), + } + labels = [0, 3] + + # Create a DebertaV3Classifier and fit your data. + classifier = keras_nlp.models.DebertaV3Classifier.from_preset( + "deberta_v3_base_en", + num_classes=4, + preprocessor=None, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=preprocessed_features, y=labels, batch_size=2) + ``` """ def __init__( @@ -142,8 +206,6 @@ def get_config(self): "num_classes": self.num_classes, "hidden_dim": self.hidden_dim, "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, } ) return config @@ -159,22 +221,3 @@ def preprocessor_cls(cls): @classproperty def presets(cls): return copy.deepcopy(backbone_presets) - - @classmethod - def from_preset( - cls, - preset, - load_weights=True, - **kwargs, - ): - return super().from_preset( - preset=preset, load_weights=load_weights, **kwargs - ) - - -DebertaV3Classifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ -format_docstring( - model_task_name=DebertaV3Classifier.__name__, - example_preset_name="deberta_v3_base_en", - preset_names=PRESET_NAMES, -)(DebertaV3Classifier.from_preset.__func__) diff --git a/keras_nlp/models/distil_bert/distil_bert_classifier.py b/keras_nlp/models/distil_bert/distil_bert_classifier.py index 53e132d0f5..06d5202e14 100644 --- a/keras_nlp/models/distil_bert/distil_bert_classifier.py +++ b/keras_nlp/models/distil_bert/distil_bert_classifier.py @@ -27,9 +27,6 @@ from keras_nlp.models.distil_bert.distil_bert_presets import backbone_presets from keras_nlp.models.task import Task from keras_nlp.utils.python_utils import classproperty -from keras_nlp.utils.python_utils import format_docstring - -PRESET_NAMES = ", ".join(list(backbone_presets)) @keras.utils.register_keras_serializable(package="keras_nlp") @@ -61,7 +58,9 @@ class DistilBertClassifier(Task): `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: + + Example usage. ```python preprocessed_features = { "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), @@ -91,6 +90,72 @@ class DistilBertClassifier(Task): # Access backbone programatically (e.g., to change `trainable`) classifier.backbone.trainable = False ``` + + Raw string inputs. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Create a DistilBertClassifier and fit your data. + classifier = keras_nlp.models.DistilBertClassifier.from_preset( + "distil_bert_base_en_uncased", + num_classes=4, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Raw string inputs with customized preprocessing. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Use a shorter sequence length. + preprocessor = keras_nlp.models.DistilBertPreprocessor.from_preset( + "distil_bert_base_en_uncased", + sequence_length=128, + ) + # Create a DistilBertClassifier and fit your data. + classifier = keras_nlp.models.DistilBertClassifier.from_preset( + "distil_bert_base_en_uncased", + num_classes=4, + preprocessor=preprocessor, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Preprocessed inputs. + ```python + # Create a dataset with preprocessed features in an `(x, y)` format. + preprocessed_features = { + "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), + "segment_ids": tf.constant( + [[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) + ), + "padding_mask": tf.constant( + [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) + ), + } + labels = [0, 3] + + # Create a DistilBERT classifier and fit your data. + classifier = keras_nlp.models.DistilBertClassifier.from_preset( + "distil_bert_base_en_uncased", + num_classes=4, + preprocessor=None, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=preprocessed_features, y=labels, batch_size=2) + ``` """ def __init__( @@ -141,8 +206,6 @@ def get_config(self): "num_classes": self.num_classes, "hidden_dim": self.hidden_dim, "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, } ) return config @@ -158,22 +221,3 @@ def preprocessor_cls(cls): @classproperty def presets(cls): return copy.deepcopy(backbone_presets) - - @classmethod - def from_preset( - cls, - preset, - load_weights=True, - **kwargs, - ): - return super().from_preset( - preset=preset, load_weights=load_weights, **kwargs - ) - - -DistilBertClassifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ -format_docstring( - model_task_name=DistilBertClassifier.__name__, - example_preset_name="distil_bert_base_en_uncased", - preset_names=PRESET_NAMES, -)(DistilBertClassifier.from_preset.__func__) diff --git a/keras_nlp/models/roberta/roberta_classifier.py b/keras_nlp/models/roberta/roberta_classifier.py index 212fe33923..e79ff844c0 100644 --- a/keras_nlp/models/roberta/roberta_classifier.py +++ b/keras_nlp/models/roberta/roberta_classifier.py @@ -23,9 +23,6 @@ from keras_nlp.models.roberta.roberta_presets import backbone_presets from keras_nlp.models.task import Task from keras_nlp.utils.python_utils import classproperty -from keras_nlp.utils.python_utils import format_docstring - -PRESET_NAMES = ", ".join(list(backbone_presets)) @keras.utils.register_keras_serializable(package="keras_nlp") @@ -57,7 +54,9 @@ class RobertaClassifier(Task): `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: + + Example usage. ```python preprocessed_features = { "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), @@ -90,6 +89,70 @@ class RobertaClassifier(Task): # Access backbone programatically (e.g., to change `trainable`) classifier.backbone.trainable = False ``` + + Raw string inputs. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Create a RobertaClassifier and fit your data. + classifier = keras_nlp.models.RobertaClassifier.from_preset( + "roberta_base_en", + num_classes=4, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Raw string inputs with customized preprocessing. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Use a shorter sequence length. + preprocessor = keras_nlp.models.RobertaPreprocessor.from_preset( + "roberta_base_en", + sequence_length=128, + ) + + # Create a RobertaClassifier and fit your data. + classifier = keras_nlp.models.RobertaClassifier.from_preset( + "roberta_base_en", + num_classes=4, + preprocessor=preprocessor, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Preprocessed inputs. + ```python + # Create a dataset with preprocessed features in an `(x, y)` format. + preprocessed_features = { + "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), + "padding_mask": tf.constant( + [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) + ), + } + labels = [0, 3] + + # Create a RoBERTa classifier and fit your data. + classifier = keras_nlp.models.RobertaClassifier.from_preset( + "roberta_base_en", + num_classes=4, + preprocessor=None, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=preprocessed_features, y=labels, batch_size=2) + ``` """ def __init__( @@ -138,8 +201,6 @@ def get_config(self): "num_classes": self.num_classes, "hidden_dim": self.hidden_dim, "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, } ) return config @@ -155,22 +216,3 @@ def preprocessor_cls(cls): @classproperty def presets(cls): return copy.deepcopy(backbone_presets) - - @classmethod - def from_preset( - cls, - preset, - load_weights=True, - **kwargs, - ): - return super().from_preset( - preset=preset, load_weights=load_weights, **kwargs - ) - - -RobertaClassifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ -format_docstring( - model_task_name=RobertaClassifier.__name__, - example_preset_name="roberta_base_en", - preset_names=PRESET_NAMES, -)(RobertaClassifier.from_preset.__func__) diff --git a/keras_nlp/models/task.py b/keras_nlp/models/task.py index 89465bf0fc..5fffe5218a 100644 --- a/keras_nlp/models/task.py +++ b/keras_nlp/models/task.py @@ -19,6 +19,7 @@ from keras_nlp.utils.pipeline_model import PipelineModel from keras_nlp.utils.python_utils import classproperty +from keras_nlp.utils.python_utils import format_docstring @keras.utils.register_keras_serializable(package="keras_nlp") @@ -30,18 +31,20 @@ def preprocess_samples(self, x, y=None, sample_weight=None): @property def backbone(self): - """A `keras_nlp.models.backbone.Backbone` instance providing the encoder submodel.""" + """A `keras.Model` instance providing the backbone submodel.""" return self._backbone @property def preprocessor(self): - """A `keras_nlp.models.preprocessor.Preprocessor` instance for preprocessing inputs.""" + """A `keras.layers.Layer` instance used to preprocess inputs.""" return self._preprocessor def get_config(self): return { "backbone": keras.layers.serialize(self.backbone), "preprocessor": keras.layers.serialize(self.preprocessor), + "name": self.name, + "trainable": self.trainable, } @classmethod @@ -130,3 +133,25 @@ def from_preset( model.load_weights(weights) return model + + def __init_subclass__(cls, **kwargs): + # Use __init_subclass__ to setup a correct docstring for from_preset. + super().__init_subclass__(**kwargs) + + # If the subclass does not define `from_preset`, assign a wrapper so that + # each class can have a distinct docstring. + if "from_preset" not in cls.__dict__: + + def from_preset(calling_cls, *args, **kwargs): + return super(cls, calling_cls).from_preset(*args, **kwargs) + + cls.from_preset = classmethod(from_preset) + + # Format and assign the docstring unless the subclass has overridden it. + if cls.from_preset.__doc__ is None: + cls.from_preset.__func__.__doc__ = Task.from_preset.__doc__ + format_docstring( + model_task_name=cls.__name__, + example_preset_name=next(iter(cls.presets), ""), + preset_names='", "'.join(cls.presets), + )(cls.from_preset.__func__) diff --git a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py index bb50640f15..35c5e89254 100644 --- a/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py +++ b/keras_nlp/models/xlm_roberta/xlm_roberta_classifier.py @@ -25,9 +25,6 @@ ) from keras_nlp.models.xlm_roberta.xlm_roberta_presets import backbone_presets from keras_nlp.utils.python_utils import classproperty -from keras_nlp.utils.python_utils import format_docstring - -PRESET_NAMES = ", ".join(list(backbone_presets)) @keras.utils.register_keras_serializable(package="keras_nlp") @@ -59,7 +56,9 @@ class XLMRobertaClassifier(Task): `None`, this model will not apply preprocessing, and inputs should be preprocessed before calling the model. - Example usage: + Examples: + + Example usage. ```python preprocessed_features = { "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), @@ -89,6 +88,70 @@ class XLMRobertaClassifier(Task): # Access backbone programatically (e.g., to change `trainable`) classifier.backbone.trainable = False ``` + + Raw string inputs. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Create a XLMRobertaClassifier and fit your data. + classifier = keras_nlp.models.XLMRobertaClassifier.from_preset( + "xlm_roberta_base_multi", + num_classes=4, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Raw string inputs with customized preprocessing. + ```python + # Create a dataset with raw string features in an `(x, y)` format. + features = ["The quick brown fox jumped.", "I forgot my homework."] + labels = [0, 3] + + # Use a shorter sequence length. + preprocessor = keras_nlp.models.XLMRobertaPreprocessor.from_preset( + "xlm_roberta_base_multi", + sequence_length=128, + ) + + # Create a XLMRobertaClassifier and fit your data. + classifier = keras_nlp.models.XLMRobertaClassifier.from_preset( + "xlm_roberta_base_multi", + num_classes=4, + preprocessor=preprocessor, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=features, y=labels, batch_size=2) + ``` + + Preprocessed inputs. + ```python + # Create a dataset with preprocessed features in an `(x, y)` format. + preprocessed_features = { + "token_ids": tf.ones(shape=(2, 12), dtype=tf.int64), + "padding_mask": tf.constant( + [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]] * 2, shape=(2, 12) + ), + } + labels = [0, 3] + + # Create a XLMRobertaClassifier and fit your data. + classifier = keras_nlp.models.XLMRobertaClassifier.from_preset( + "xlm_roberta_base_multi", + num_classes=4, + preprocessor=None, + ) + classifier.compile( + loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), + ) + classifier.fit(x=preprocessed_features, y=labels, batch_size=2) + ``` """ def __init__( @@ -140,8 +203,6 @@ def get_config(self): "num_classes": self.num_classes, "hidden_dim": self.hidden_dim, "dropout": self.dropout, - "name": self.name, - "trainable": self.trainable, } ) return config @@ -157,22 +218,3 @@ def preprocessor_cls(cls): @classproperty def presets(cls): return copy.deepcopy(backbone_presets) - - @classmethod - def from_preset( - cls, - preset, - load_weights=True, - **kwargs, - ): - return super().from_preset( - preset=preset, load_weights=load_weights, **kwargs - ) - - -XLMRobertaClassifier.from_preset.__func__.__doc__ = Task.from_preset.__doc__ -format_docstring( - model_task_name=XLMRobertaClassifier.__name__, - example_preset_name="xlm_roberta_base_multi", - preset_names=PRESET_NAMES, -)(XLMRobertaClassifier.from_preset.__func__)