From 0f0b7d0f75e39ff27e69ed7a6a91650fc6fd0745 Mon Sep 17 00:00:00 2001 From: Somasree Date: Mon, 16 Jan 2023 06:17:57 +0530 Subject: [PATCH 1/6] Corrected the epsilon value --- keras_nlp/layers/transformer_encoder.py | 2 +- keras_nlp/layers/transformer_encoder_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index 26b27fb2b3..e5415bc5f6 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -87,7 +87,7 @@ def __init__( num_heads, dropout=0, activation="relu", - layer_norm_epsilon=1e-05, + layer_norm_epsilon=1e-12, kernel_initializer="glorot_uniform", bias_initializer="zeros", normalize_first=False, diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index a95766441b..1b4b8cf7c4 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -68,7 +68,7 @@ def test_get_config_and_from_config(self): "num_heads": 2, "dropout": 0, "activation": "relu", - "layer_norm_epsilon": 1e-05, + "layer_norm_epsilon": 1e-12, "kernel_initializer": keras.initializers.serialize( keras.initializers.HeNormal() ), From 50441fc82c9a8c5f672e0a6b0d0f9b8892728e9e Mon Sep 17 00:00:00 2001 From: Somasree Date: Mon, 16 Jan 2023 06:35:58 +0530 Subject: [PATCH 2/6] Corrected the epsilon value --- keras_nlp/layers/transformer_encoder.py | 2 +- keras_nlp/layers/transformer_encoder_test.py | 2 +- keras_nlp/models/bert/bert_backbone.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/keras_nlp/layers/transformer_encoder.py b/keras_nlp/layers/transformer_encoder.py index e5415bc5f6..26b27fb2b3 100644 --- a/keras_nlp/layers/transformer_encoder.py +++ b/keras_nlp/layers/transformer_encoder.py @@ -87,7 +87,7 @@ def __init__( num_heads, dropout=0, activation="relu", - layer_norm_epsilon=1e-12, + layer_norm_epsilon=1e-05, kernel_initializer="glorot_uniform", bias_initializer="zeros", normalize_first=False, diff --git a/keras_nlp/layers/transformer_encoder_test.py b/keras_nlp/layers/transformer_encoder_test.py index 1b4b8cf7c4..a95766441b 100644 --- a/keras_nlp/layers/transformer_encoder_test.py +++ b/keras_nlp/layers/transformer_encoder_test.py @@ -68,7 +68,7 @@ def test_get_config_and_from_config(self): "num_heads": 2, "dropout": 0, "activation": "relu", - "layer_norm_epsilon": 1e-12, + "layer_norm_epsilon": 1e-05, "kernel_initializer": keras.initializers.serialize( keras.initializers.HeNormal() ), diff --git a/keras_nlp/models/bert/bert_backbone.py b/keras_nlp/models/bert/bert_backbone.py index 6fb8a25e70..2fee35f513 100644 --- a/keras_nlp/models/bert/bert_backbone.py +++ b/keras_nlp/models/bert/bert_backbone.py @@ -163,6 +163,7 @@ def __init__( x, approximate=True ), dropout=dropout, + epsilon=1e-12, kernel_initializer=bert_kernel_initializer(), name=f"transformer_layer_{i}", )(x, padding_mask=padding_mask) From 1cf99ee5ab88e91dffe4c1c5670dca9d6f8f34f0 Mon Sep 17 00:00:00 2001 From: Somasree Date: Mon, 16 Jan 2023 10:04:06 +0530 Subject: [PATCH 3/6] Corrected the epsilon value --- keras_nlp/models/bert/bert_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/models/bert/bert_backbone.py b/keras_nlp/models/bert/bert_backbone.py index 2fee35f513..32ef2d7101 100644 --- a/keras_nlp/models/bert/bert_backbone.py +++ b/keras_nlp/models/bert/bert_backbone.py @@ -163,7 +163,7 @@ def __init__( x, approximate=True ), dropout=dropout, - epsilon=1e-12, + layer_norm_epilson=1e-12, kernel_initializer=bert_kernel_initializer(), name=f"transformer_layer_{i}", )(x, padding_mask=padding_mask) From 2754375339407295ff0378a4300a4e672c837cb2 Mon Sep 17 00:00:00 2001 From: Somasree Date: Mon, 16 Jan 2023 10:12:07 +0530 Subject: [PATCH 4/6] Corrected the epsilon value --- keras_nlp/models/bert/bert_backbone.py | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/keras_nlp/models/bert/bert_backbone.py b/keras_nlp/models/bert/bert_backbone.py index 32ef2d7101..7b68f3d3c1 100644 --- a/keras_nlp/models/bert/bert_backbone.py +++ b/keras_nlp/models/bert/bert_backbone.py @@ -109,15 +109,9 @@ def __init__( # Index of classification token in the vocabulary cls_token_index = 0 # Inputs - token_id_input = keras.Input( - shape=(None,), dtype="int32", name="token_ids" - ) - segment_id_input = keras.Input( - shape=(None,), dtype="int32", name="segment_ids" - ) - padding_mask = keras.Input( - shape=(None,), dtype="int32", name="padding_mask" - ) + token_id_input = keras.Input(shape=(None,), dtype="int32", name="token_ids") + segment_id_input = keras.Input(shape=(None,), dtype="int32", name="segment_ids") + padding_mask = keras.Input(shape=(None,), dtype="int32", name="padding_mask") # Embed tokens, positions, and segment ids. token_embedding_layer = keras.layers.Embedding( @@ -140,9 +134,7 @@ def __init__( )(segment_id_input) # Sum, normalize and apply dropout to embeddings. - x = keras.layers.Add()( - (token_embedding, position_embedding, segment_embedding) - ) + x = keras.layers.Add()((token_embedding, position_embedding, segment_embedding)) x = keras.layers.LayerNormalization( name="embeddings_layer_norm", axis=-1, @@ -159,9 +151,7 @@ def __init__( x = TransformerEncoder( num_heads=num_heads, intermediate_dim=intermediate_dim, - activation=lambda x: keras.activations.gelu( - x, approximate=True - ), + activation=lambda x: keras.activations.gelu(x, approximate=True), dropout=dropout, layer_norm_epilson=1e-12, kernel_initializer=bert_kernel_initializer(), From 9255050c2c618791fe36f41891c472d9316271d0 Mon Sep 17 00:00:00 2001 From: Somasree Date: Mon, 16 Jan 2023 12:10:18 +0530 Subject: [PATCH 5/6] Corrected the epsilon value --- keras_nlp/models/bert/bert_backbone.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/keras_nlp/models/bert/bert_backbone.py b/keras_nlp/models/bert/bert_backbone.py index 7b68f3d3c1..32ef2d7101 100644 --- a/keras_nlp/models/bert/bert_backbone.py +++ b/keras_nlp/models/bert/bert_backbone.py @@ -109,9 +109,15 @@ def __init__( # Index of classification token in the vocabulary cls_token_index = 0 # Inputs - token_id_input = keras.Input(shape=(None,), dtype="int32", name="token_ids") - segment_id_input = keras.Input(shape=(None,), dtype="int32", name="segment_ids") - padding_mask = keras.Input(shape=(None,), dtype="int32", name="padding_mask") + token_id_input = keras.Input( + shape=(None,), dtype="int32", name="token_ids" + ) + segment_id_input = keras.Input( + shape=(None,), dtype="int32", name="segment_ids" + ) + padding_mask = keras.Input( + shape=(None,), dtype="int32", name="padding_mask" + ) # Embed tokens, positions, and segment ids. token_embedding_layer = keras.layers.Embedding( @@ -134,7 +140,9 @@ def __init__( )(segment_id_input) # Sum, normalize and apply dropout to embeddings. - x = keras.layers.Add()((token_embedding, position_embedding, segment_embedding)) + x = keras.layers.Add()( + (token_embedding, position_embedding, segment_embedding) + ) x = keras.layers.LayerNormalization( name="embeddings_layer_norm", axis=-1, @@ -151,7 +159,9 @@ def __init__( x = TransformerEncoder( num_heads=num_heads, intermediate_dim=intermediate_dim, - activation=lambda x: keras.activations.gelu(x, approximate=True), + activation=lambda x: keras.activations.gelu( + x, approximate=True + ), dropout=dropout, layer_norm_epilson=1e-12, kernel_initializer=bert_kernel_initializer(), From f9318b532b61d292fa78b8c3d92ed1f27576bf68 Mon Sep 17 00:00:00 2001 From: Somasree Majumder <56045049+soma2000-lang@users.noreply.github.com> Date: Mon, 16 Jan 2023 19:19:11 +0530 Subject: [PATCH 6/6] Update bert_backbone.py --- keras_nlp/models/bert/bert_backbone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/keras_nlp/models/bert/bert_backbone.py b/keras_nlp/models/bert/bert_backbone.py index 32ef2d7101..e6a6eab7cd 100644 --- a/keras_nlp/models/bert/bert_backbone.py +++ b/keras_nlp/models/bert/bert_backbone.py @@ -163,7 +163,7 @@ def __init__( x, approximate=True ), dropout=dropout, - layer_norm_epilson=1e-12, + layer_norm_epsilon=1e-12, kernel_initializer=bert_kernel_initializer(), name=f"transformer_layer_{i}", )(x, padding_mask=padding_mask)