diff --git a/changelog/5635.improvement.rst b/changelog/5635.improvement.rst new file mode 100644 index 000000000000..5d07d4fb5573 --- /dev/null +++ b/changelog/5635.improvement.rst @@ -0,0 +1 @@ +Update dependencies based on the ``dependabot`` check. diff --git a/changelog/5636.improvement.rst b/changelog/5636.improvement.rst index eb70e1242e36..ac8573c95f6a 100644 --- a/changelog/5636.improvement.rst +++ b/changelog/5636.improvement.rst @@ -1 +1,2 @@ -Update dependencies based on the ``dependabot`` check. \ No newline at end of file +Add dropout between ``FFNN`` and ``DenseForSparse`` layers in ``DIETClassifier``, +``ResponseSelector`` and ``EmbeddingIntentClassifier`` controlled by ``use_dense_input_dropout`` config parameter. diff --git a/docs/nlu/components.rst b/docs/nlu/components.rst index 1546bfc988ad..f366e2927fee 100644 --- a/docs/nlu/components.rst +++ b/docs/nlu/components.rst @@ -922,7 +922,9 @@ EmbeddingIntentClassifier | drop_rate | 0.2 | Dropout rate for encoder. Value should be between 0 and 1. | | | | The higher the value the higher the regularization effect. | +---------------------------------+------------------+--------------------------------------------------------------+ - | use_sparse_input_dropout | True | If 'True' apply dropout to sparse tensors. | + | use_sparse_input_dropout | False | If 'True' apply dropout to sparse input tensors. | + +---------------------------------+------------------+--------------------------------------------------------------+ + | use_dense_input_dropout | False | If 'True' apply dropout to dense input tensors. | +---------------------------------+------------------+--------------------------------------------------------------+ | evaluate_every_number_of_epochs | 20 | How often to calculate validation accuracy. | | | | Set to '-1' to evaluate just once at the end of training. | @@ -1486,7 +1488,9 @@ ResponseSelector | drop_rate_attention | 0.0 | Dropout rate for attention. Value should be between 0 and 1. | | | | The higher the value the higher the regularization effect. | +---------------------------------+-------------------+--------------------------------------------------------------+ - | use_sparse_input_dropout | False | If 'True' apply dropout to sparse tensors. | + | use_sparse_input_dropout | False | If 'True' apply dropout to sparse input tensors. | + +---------------------------------+-------------------+--------------------------------------------------------------+ + | use_dense_input_dropout | False | If 'True' apply dropout to dense input tensors. | +---------------------------------+-------------------+--------------------------------------------------------------+ | evaluate_every_number_of_epochs | 20 | How often to calculate validation accuracy. | | | | Set to '-1' to evaluate just once at the end of training. | @@ -1715,7 +1719,9 @@ DIETClassifier | drop_rate_attention | 0.0 | Dropout rate for attention. Value should be between 0 and 1. | | | | The higher the value the higher the regularization effect. | +---------------------------------+------------------+--------------------------------------------------------------+ - | use_sparse_input_dropout | True | If 'True' apply dropout to sparse tensors. | + | use_sparse_input_dropout | True | If 'True' apply dropout to sparse input tensors. | + +---------------------------------+------------------+--------------------------------------------------------------+ + | use_dense_input_dropout | True | If 'True' apply dropout to dense input tensors. | +---------------------------------+------------------+--------------------------------------------------------------+ | evaluate_every_number_of_epochs | 20 | How often to calculate validation accuracy. | | | | Set to '-1' to evaluate just once at the end of training. | diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 17c996275fec..9b90f7905756 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -55,6 +55,7 @@ SIMILARITY_TYPE, NUM_NEG, SPARSE_INPUT_DROPOUT, + DENSE_INPUT_DROPOUT, MASKED_LM, ENTITY_RECOGNITION, TENSORBOARD_LOG_DIR, @@ -188,8 +189,10 @@ def required_components(cls) -> List[Type[Component]]: DROP_RATE_ATTENTION: 0, # Sparsity of the weights in dense layers WEIGHT_SPARSITY: 0.8, - # If 'True' apply dropout to sparse tensors + # If 'True' apply dropout to sparse input tensors SPARSE_INPUT_DROPOUT: True, + # If 'True' apply dropout to dense input tensors + DENSE_INPUT_DROPOUT: True, # ## Evaluation parameters # How often calculate validation accuracy. # Small values may hurt performance, e.g. model accuracy. @@ -1075,7 +1078,10 @@ def _prepare_sparse_dense_layers( ) def _prepare_input_layers(self, name: Text) -> None: - self._tf_layers[f"sparse_dropout.{name}"] = layers.SparseDropout( + self._tf_layers[f"sparse_input_dropout.{name}"] = layers.SparseDropout( + rate=self.config[DROP_RATE] + ) + self._tf_layers[f"dense_input_dropout.{name}"] = tf.keras.layers.Dropout( rate=self.config[DROP_RATE] ) self._prepare_sparse_dense_layers( @@ -1172,6 +1178,7 @@ def _combine_sparse_dense_features( mask: tf.Tensor, name: Text, sparse_dropout: bool = False, + dense_dropout: bool = False, ) -> tf.Tensor: dense_features = [] @@ -1179,14 +1186,22 @@ def _combine_sparse_dense_features( for f in features: if isinstance(f, tf.SparseTensor): if sparse_dropout: - _f = self._tf_layers[f"sparse_dropout.{name}"](f, self._training) + _f = self._tf_layers[f"sparse_input_dropout.{name}"]( + f, self._training + ) else: _f = f dense_features.append(self._tf_layers[f"sparse_to_dense.{name}"](_f)) else: dense_features.append(f) - return tf.concat(dense_features, axis=-1) * mask + outputs = tf.concat(dense_features, axis=-1) * mask + if dense_dropout: + outputs = self._tf_layers[f"dense_input_dropout.{name}"]( + outputs, self._training + ) + + return outputs def _features_as_seq_ids( self, features: List[Union[np.ndarray, tf.Tensor, tf.SparseTensor]], name: Text @@ -1213,9 +1228,12 @@ def _create_bow( mask: tf.Tensor, name: Text, sparse_dropout: bool = False, + dense_dropout: bool = False, ) -> tf.Tensor: - x = self._combine_sparse_dense_features(features, mask, name, sparse_dropout) + x = self._combine_sparse_dense_features( + features, mask, name, sparse_dropout, dense_dropout + ) x = tf.reduce_sum(x, axis=1) # convert to bag-of-words return self._tf_layers[f"ffnn.{name}"](x, self._training) @@ -1224,6 +1242,8 @@ def _create_sequence( features: List[Union[tf.Tensor, tf.SparseTensor]], mask: tf.Tensor, name: Text, + sparse_dropout: bool = False, + dense_dropout: bool = False, masked_lm_loss: bool = False, sequence_ids: bool = False, ) -> Tuple[tf.Tensor, tf.Tensor, Optional[tf.Tensor], Optional[tf.Tensor]]: @@ -1233,7 +1253,7 @@ def _create_sequence( seq_ids = None inputs = self._combine_sparse_dense_features( - features, mask, name, sparse_dropout=self.config[SPARSE_INPUT_DROPOUT] + features, mask, name, sparse_dropout, dense_dropout, ) inputs = self._tf_layers[f"ffnn.{name}"](inputs, self._training) @@ -1387,7 +1407,9 @@ def batch_loss( tf_batch_data[TEXT_FEATURES], mask_text, self.text_name, - self.config[MASKED_LM], + sparse_dropout=self.config[SPARSE_INPUT_DROPOUT], + dense_dropout=self.config[DENSE_INPUT_DROPOUT], + masked_lm_loss=self.config[MASKED_LM], sequence_ids=True, ) diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py index 4886c987f3e7..d6cd4244d298 100644 --- a/rasa/nlu/classifiers/embedding_intent_classifier.py +++ b/rasa/nlu/classifiers/embedding_intent_classifier.py @@ -22,6 +22,7 @@ SIMILARITY_TYPE, NUM_NEG, SPARSE_INPUT_DROPOUT, + DENSE_INPUT_DROPOUT, MASKED_LM, ENTITY_RECOGNITION, INTENT_CLASSIFICATION, @@ -127,6 +128,8 @@ def required_components(cls) -> List[Type[Component]]: WEIGHT_SPARSITY: 0.0, # If 'True' apply dropout to sparse tensors SPARSE_INPUT_DROPOUT: False, + # If 'True' apply dropout to dense input tensors + DENSE_INPUT_DROPOUT: False, # ## Evaluation parameters # How often calculate validation accuracy. # Small values may hurt performance, e.g. model accuracy. diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index d910c582bfe2..bc9ea4e3fba2 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -39,6 +39,7 @@ SIMILARITY_TYPE, NUM_NEG, SPARSE_INPUT_DROPOUT, + DENSE_INPUT_DROPOUT, MASKED_LM, ENTITY_RECOGNITION, INTENT_CLASSIFICATION, @@ -179,8 +180,10 @@ def required_components(cls) -> List[Type[Component]]: DROP_RATE: 0.2, # Dropout rate for attention DROP_RATE_ATTENTION: 0, - # If 'True' apply dropout to sparse tensors + # If 'True' apply dropout to sparse input tensors SPARSE_INPUT_DROPOUT: False, + # If 'True' apply dropout to dense input tensors + DENSE_INPUT_DROPOUT: False, # ## Evaluation parameters # How often calculate validation accuracy. # Small values may hurt performance, e.g. model accuracy. @@ -467,7 +470,9 @@ def batch_loss( tf_batch_data[TEXT_FEATURES], mask_text, self.text_name, - self.config[MASKED_LM], + sparse_dropout=self.config[SPARSE_INPUT_DROPOUT], + dense_dropout=self.config[DENSE_INPUT_DROPOUT], + masked_lm_loss=self.config[MASKED_LM], sequence_ids=True, ) diff --git a/rasa/utils/tensorflow/constants.py b/rasa/utils/tensorflow/constants.py index 8563ff8d7c19..b2398de45711 100644 --- a/rasa/utils/tensorflow/constants.py +++ b/rasa/utils/tensorflow/constants.py @@ -46,6 +46,7 @@ MASKED_LM = "use_masked_language_model" SPARSE_INPUT_DROPOUT = "use_sparse_input_dropout" +DENSE_INPUT_DROPOUT = "use_dense_input_dropout" RANKING_LENGTH = "ranking_length"