From c30b75a9d8ed464c77d4a911c9da6132aacb59f0 Mon Sep 17 00:00:00 2001 From: smokrow Date: Tue, 15 Jan 2019 01:35:46 +0100 Subject: [PATCH 01/26] edited buildfile for normalizations. Implemented GroupNorm,InstanceNorm and LayerNorm and first testcase --- tensorflow_addons/layers/BUILD | 15 +- .../layers/python/normalizations.py | 284 ++++++++++++++++++ .../layers/python/normalizations_test.py | 69 +++++ 3 files changed, 367 insertions(+), 1 deletion(-) create mode 100644 tensorflow_addons/layers/python/normalizations.py create mode 100644 tensorflow_addons/layers/python/normalizations_test.py diff --git a/tensorflow_addons/layers/BUILD b/tensorflow_addons/layers/BUILD index 1d5c07d687..208c1e1312 100644 --- a/tensorflow_addons/layers/BUILD +++ b/tensorflow_addons/layers/BUILD @@ -8,6 +8,7 @@ py_library( "__init__.py", "python/__init__.py", "python/wrappers.py", + "python/normalizations.py" ]), srcs_version = "PY2AND3", ) @@ -22,4 +23,16 @@ py_test( ":layers_py", ], srcs_version = "PY2AND3", -) \ No newline at end of file +) + +py_test( + name = "layers_normalizations_py_test", + srcs = [ + "python/normalizations_test.py", + ], + main = "python/normalizations_test.py", + deps = [ + ":layers_py", + ], + srcs_version = "PY2AND3", +) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py new file mode 100644 index 0000000000..7bc85054c3 --- /dev/null +++ b/tensorflow_addons/layers/python/normalizations.py @@ -0,0 +1,284 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +# Orginal implementation from keras_contrib/layer/normalization + +from tensorflow.keras.layers import Layer, InputSpec +from tensorflow.keras import initializers, regularizers, constraints +from tensorflow.keras import backend as K +from tensorflow.keras.utils import get_custom_objects +from tensorflow.python.ops import nn + +class GroupNorm(Layer): + """Group normalization layer. + Group Normalization divides the channels into groups and computes + within each group + the mean and variance for normalization. + Group Normalization's computation is independent + of batch sizes, and its accuracy is stable in a wide range of batch sizes. + Relation to Layer Normalization: + If the number of groups is set to 1, then this operation becomes identical to + Layer Normalization. + Relation to Instance Normalization: + If the number of groups is set to the + input dimension (number of groups is equal + to number of channels), then this operation becomes + identical to Instance Normalization. + # Arguments + groups: Integer, the number of groups for Group Normalization. + Can be in the range [1, N] where N is the input dimension. + The input dimension must be divisible by the number of groups. + axis: Integer, the axis that should be normalized + (typically the features axis). + For instance, after a `Conv2D` layer with + `data_format="channels_first"`, + set `axis=1` in `BatchNormalization`. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. + If False, `beta` is ignored. + scale: If True, multiply by `gamma`. + If False, `gamma` is not used. + When the next layer is linear (also e.g. `nn.relu`), + this can be disabled since the scaling + will be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: Optional constraint for the beta weight. + gamma_constraint: Optional constraint for the gamma weight. + # Input shape + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + # Output shape + Same shape as input. + # References + - [Group Normalization](https://arxiv.org/abs/1803.08494) + """ + + def __init__(self, + layer, + groups=32, + axis=-1, + epsilon=1e-5, + center=True, + scale=True, + beta_initializer='zeros', + gamma_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + **kwargs): + super(GroupNorm, self).__init__(layer,**kwargs) + self.supports_masking = True + self.groups = groups + self.axis = axis + self.epsilon = epsilon + self.center = center + self.scale = scale + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + + def build(self, input_shape): + dim = input_shape[self.axis] + + if dim is None: + raise ValueError('Axis ' + str(self.axis) + ' of ' + 'input tensor should have a defined dimension ' + 'but the layer received an input with shape ' + + str(input_shape) + '.') + if self.groups==-1: + self.groups=dim + + if dim < self.groups: + raise ValueError('Number of groups (' + str(self.groups) + ') cannot be ' + 'more than the number of channels (' + + str(dim) + ').') + + if dim % self.groups != 0: + raise ValueError('Number of groups (' + str(self.groups) + ') must be a ' + 'multiple of the number of channels (' + + str(dim) + ').') + + self.input_spec = InputSpec(ndim=len(input_shape), + axes={self.axis: dim}) + shape = (dim,) + + if self.scale: + self.gamma = self.add_weight(shape=shape, + name='gamma', + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint) + else: + self.gamma = None + if self.center: + self.beta = self.add_weight(shape=shape, + name='beta', + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint) + else: + self.beta = None + self.built = True + + def call(self, inputs, **kwargs): + input_shape = K.int_shape(inputs) + tensor_input_shape = K.shape(inputs) + + # Prepare broadcasting shape. + reduction_axes = list(range(len(input_shape))) + del reduction_axes[self.axis] + broadcast_shape = [1] * len(input_shape) + broadcast_shape[self.axis] = input_shape[self.axis] // self.groups + broadcast_shape.insert(1, self.groups) + + reshape_group_shape = K.shape(inputs) + group_axes = [reshape_group_shape[i] for i in range(len(input_shape))] + group_axes[self.axis] = input_shape[self.axis] // self.groups + group_axes.insert(1, self.groups) + + # reshape inputs to new group shape + group_shape = [group_axes[0], self.groups] + group_axes[2:] + group_shape = K.stack(group_shape) + inputs = K.reshape(inputs, group_shape) + + group_reduction_axes = list(range(len(group_axes))) + mean, variance = nn.moments(inputs, group_reduction_axes[2:], + keep_dims=True) + inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) + + # prepare broadcast shape + inputs = K.reshape(inputs, group_shape) + + outputs = inputs + + # In this case we must explicitly broadcast all parameters. + if self.scale: + broadcast_gamma = K.reshape(self.gamma, broadcast_shape) + outputs = outputs * broadcast_gamma + + if self.center: + broadcast_beta = K.reshape(self.beta, broadcast_shape) + outputs = outputs + broadcast_beta + + # finally we reshape the output back to the input shape + outputs = K.reshape(outputs, tensor_input_shape) + + return outputs + + def get_config(self): + config = { + 'groups': self.groups, + 'axis': self.axis, + 'epsilon': self.epsilon, + 'center': self.center, + 'scale': self.scale, + 'beta_initializer': initializers.serialize(self.beta_initializer), + 'gamma_initializer': initializers.serialize(self.gamma_initializer), + 'beta_regularizer': regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': constraints.serialize(self.beta_constraint), + 'gamma_constraint': constraints.serialize(self.gamma_constraint) + } + base_config = super(GroupNormalization, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + return input_shape + +class LayerNorm(GroupNorm): + """Layer normalization layer. + Layer Normalization is an specific case of ```GroupNormalization```since it + normalizes all features of a layer. The Groupsize is 1. + Layer Normalization's computation is independent + of batch sizes, and its accuracy is stable in a wide range of batch sizes. + # Arguments + axis: Integer, the axis that should be normalized + (typically the features axis). + For instance, after a `Conv2D` layer with + `data_format="channels_first"`, + set `axis=1` in `BatchNormalization`. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. + If False, `beta` is ignored. + scale: If True, multiply by `gamma`. + If False, `gamma` is not used. + When the next layer is linear (also e.g. `nn.relu`), + this can be disabled since the scaling + will be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: Optional constraint for the beta weight. + gamma_constraint: Optional constraint for the gamma weight. + # Input shape + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + # Output shape + Same shape as input. + # References + - [Layer Normalization](https://arxiv.org/abs/1607.06450) + """ + def __init__(self,**kwargs): + kwargs["groups"]=1 + super(LayerNorm,self).__init__(**kwargs) + +class InstanceNorm(GroupNorm): + """Instance normalization layer. + Instance Normalization is an specific case of ```GroupNormalization```since it + normalizes all features of one channel. The Groupsize is equal to the channel size. + Instance Normalization's computation is independent + of batch sizes, and its accuracy is stable in a wide range of batch sizes. + # Arguments + axis: Integer, the axis that should be normalized + (typically the features axis). + For instance, after a `Conv2D` layer with + `data_format="channels_first"`, + set `axis=1` in `BatchNormalization`. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. + If False, `beta` is ignored. + scale: If True, multiply by `gamma`. + If False, `gamma` is not used. + When the next layer is linear (also e.g. `nn.relu`), + this can be disabled since the scaling + will be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: Optional constraint for the beta weight. + gamma_constraint: Optional constraint for the gamma weight. + # Input shape + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + # Output shape + Same shape as input. + # References + - [Layer Normalization](https://arxiv.org/abs/1607.06450) + """ + def __init__(self,**kwargs): + kwargs["groups"]=-1 + super(InstanceNorm,self).__init__(**kwargs) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py new file mode 100644 index 0000000000..d7b8d913d1 --- /dev/null +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -0,0 +1,69 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= + +from tensorflow_addons.layers.python.normalizations import GroupNorm,LayerNorm,InstanceNorm +import numpy as np +import tensorflow as tf +from tensorflow.python import keras as keras +from tensorflow.python.training.rmsprop import RMSPropOptimizer + +from tensorflow.python.platform import test +from tensorflow.python.framework import test_util as tf_test_util + + +class NormTest(test.TestCase): + + @tf_test_util.run_all_in_graph_and_eager_modes + def test_groupnorm_flat(self): + # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm + groups=[-1,16,1] + for i in groups: + + model = keras.models.Sequential() + model.add(GroupNorm( + keras.layers.Dense(32), input_shape=(32,),groups=i)) + + model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') + model.fit( + np.random.random((10,32)), + np.random.random((10,32)), + epochs=1, + batch_size=10) + self.assertTrue(hasattr(model.layers[0], 'gamma')) + self.assertTrue(hasattr(model.layers[0], 'beta')) + + @tf_test_util.run_all_in_graph_and_eager_modes + def test_groupnorm_conv(self): + # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm + groups=[1,5,-1] + for i in groups: + + model = keras.models.Sequential() + model.add(GroupNorm( + keras.layers.Conv2D(5, (3, 10), padding='same'), + input_shape=(3,10),groups=i)) + + model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') + model.fit( + np.random.random((10, 3, 10)), + np.random.random((10, 3, 10)), + epochs=1, + batch_size=10) + self.assertTrue(hasattr(model.layers[0], 'gamma')) + self.assertTrue(hasattr(model.layers[0], 'beta')) + + +if __name__ == "__main__": + test.main() From 0e7674badbafa16e5653065f4e2597ef9d440d0f Mon Sep 17 00:00:00 2001 From: smokrow Date: Thu, 17 Jan 2019 16:51:46 +0100 Subject: [PATCH 02/26] Resolved Comments --- tensorflow_addons/layers/python/normalizations.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 7bc85054c3..14c37c4ff7 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -18,10 +18,9 @@ from tensorflow.keras.layers import Layer, InputSpec from tensorflow.keras import initializers, regularizers, constraints from tensorflow.keras import backend as K -from tensorflow.keras.utils import get_custom_objects from tensorflow.python.ops import nn -class GroupNorm(Layer): +class GroupNormalization(Layer): """Group normalization layer. Group Normalization divides the channels into groups and computes within each group @@ -83,7 +82,7 @@ def __init__(self, beta_constraint=None, gamma_constraint=None, **kwargs): - super(GroupNorm, self).__init__(layer,**kwargs) + super(GroupNormalization, self).__init__(layer,**kwargs) self.supports_masking = True self.groups = groups self.axis = axis @@ -140,7 +139,7 @@ def build(self, input_shape): self.beta = None self.built = True - def call(self, inputs, **kwargs): + def call(self, inputs): input_shape = K.int_shape(inputs) tensor_input_shape = K.shape(inputs) @@ -205,7 +204,7 @@ def get_config(self): def compute_output_shape(self, input_shape): return input_shape -class LayerNorm(GroupNorm): +class LayerNormalization(GroupNormalization): """Layer normalization layer. Layer Normalization is an specific case of ```GroupNormalization```since it normalizes all features of a layer. The Groupsize is 1. @@ -242,9 +241,9 @@ class LayerNorm(GroupNorm): """ def __init__(self,**kwargs): kwargs["groups"]=1 - super(LayerNorm,self).__init__(**kwargs) + super(LayerNormalization,self).__init__(**kwargs) -class InstanceNorm(GroupNorm): +class InstanceNormalization(GroupNormalization): """Instance normalization layer. Instance Normalization is an specific case of ```GroupNormalization```since it normalizes all features of one channel. The Groupsize is equal to the channel size. @@ -281,4 +280,4 @@ class InstanceNorm(GroupNorm): """ def __init__(self,**kwargs): kwargs["groups"]=-1 - super(InstanceNorm,self).__init__(**kwargs) + super(InstanceNormalization,self).__init__(**kwargs) From 65a5495c41b1467b5cc6cfc5347073ec080362fe Mon Sep 17 00:00:00 2001 From: smokrow Date: Sat, 9 Feb 2019 12:16:35 +0100 Subject: [PATCH 03/26] found bug in normalizations init --- .../layers/python/normalizations.py | 3 +- .../layers/python/normalizations_test.py | 225 ++++++++++++++++-- 2 files changed, 210 insertions(+), 18 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 14c37c4ff7..52251557e8 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -69,7 +69,6 @@ class GroupNormalization(Layer): """ def __init__(self, - layer, groups=32, axis=-1, epsilon=1e-5, @@ -82,7 +81,7 @@ def __init__(self, beta_constraint=None, gamma_constraint=None, **kwargs): - super(GroupNormalization, self).__init__(layer,**kwargs) + super(GroupNormalization, self).__init__(**kwargs) self.supports_masking = True self.groups = groups self.axis = axis diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index d7b8d913d1..57e9f6cfec 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================= -from tensorflow_addons.layers.python.normalizations import GroupNorm,LayerNorm,InstanceNorm +from tensorflow_addons.layers.python.normalizations import GroupNormalization,LayerNormalization,InstanceNormalization import numpy as np import tensorflow as tf from tensorflow.python import keras as keras @@ -23,17 +23,32 @@ from tensorflow.python.framework import test_util as tf_test_util -class NormTest(test.TestCase): +class normalization_test(test.TestCase): + + @tf_test_util.run_all_in_graph_and_eager_modes + def test_weights(self): + layer = GroupNormalization(groups=1,scale=False, center=False) + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights), 0) + self.assertEqual(len(layer.weights), 0) + + layer = keras.layers.LayerNormalization() + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights), 2) + self.assertEqual(len(layer.weights), 2) + + + @tf_test_util.run_all_in_graph_and_eager_modes def test_groupnorm_flat(self): # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm groups=[-1,16,1] for i in groups: - - model = keras.models.Sequential() - model.add(GroupNorm( - keras.layers.Dense(32), input_shape=(32,),groups=i)) + model=keras.models.Sequential() + model.add(GroupNormalization( + input_shape=(32,),groups=i)) + model.add(keras.layers.Dense(32)) model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') model.fit( @@ -44,26 +59,204 @@ def test_groupnorm_flat(self): self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) - @tf_test_util.run_all_in_graph_and_eager_modes def test_groupnorm_conv(self): # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm - groups=[1,5,-1] + #groups=[1,5,-1] + groups=[1] for i in groups: model = keras.models.Sequential() - model.add(GroupNorm( - keras.layers.Conv2D(5, (3, 10), padding='same'), - input_shape=(3,10),groups=i)) + model.add(GroupNormalization( + input_shape=(20,20,3,),groups=i)) + + model.add(keras.layers.Conv2D(5, (1, 1), padding='same')) model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') - model.fit( - np.random.random((10, 3, 10)), - np.random.random((10, 3, 10)), - epochs=1, - batch_size=10) + model.fit(np.random.random((10,20, 20, 3))) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) + """def testUnknownShape(self): + inputs = array_ops.placeholder(dtypes.float32) + with self.assertRaisesRegexp(ValueError, 'undefined rank'): + GroupNormalization(inputs) + LayerNormaliztion(inputs) + InstanceNormalization(inputs)""" +""" +class LayerNormalizationTest(keras_parameterized.TestCase): + + + @tf_test_util.run_in_graph_and_eager_modes + def test_layernorm_regularization(self): + layer = keras.layers.LayerNormalization( + gamma_regularizer='l1', beta_regularizer='l1') + layer.build((None, 3, 4)) + self.assertEqual(len(layer.losses), 2) + max_norm = keras.constraints.max_norm + layer = keras.layers.LayerNormalization( + gamma_constraint=max_norm, beta_constraint=max_norm) + layer.build((None, 3, 4)) + self.assertEqual(layer.gamma.constraint, max_norm) + self.assertEqual(layer.beta.constraint, max_norm) + + @keras_parameterized.run_all_keras_modes + def test_layernorm_convnet(self): + if test.is_gpu_available(cuda_only=True): + with self.session(use_gpu=True): + model = keras.models.Sequential() + norm = keras.layers.LayerNormalization(input_shape=(3, 4, 4)) + model.add(norm) + model.compile(loss='mse', + optimizer=gradient_descent.GradientDescentOptimizer(0.01), + run_eagerly=testing_utils.should_run_eagerly()) + + # centered on 5.0, variance 10.0 + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) + model.fit(x, x, epochs=4, verbose=0) + out = model.predict(x) + out -= np.reshape(keras.backend.eval(norm.beta), (1, 3, 1, 1)) + out /= np.reshape(keras.backend.eval(norm.gamma), (1, 3, 1, 1)) + + np.testing.assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) + np.testing.assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) + + @keras_parameterized.run_all_keras_modes + def test_layernorm_convnet_channel_last(self): + model = keras.models.Sequential() + norm = keras.layers.LayerNormalization(input_shape=(4, 4, 3)) + model.add(norm) + model.compile(loss='mse', + optimizer=gradient_descent.GradientDescentOptimizer(0.01), + run_eagerly=testing_utils.should_run_eagerly()) + + # centered on 5.0, variance 10.0 + x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3)) + model.fit(x, x, epochs=4, verbose=0) + out = model.predict(x) + out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3)) + out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3)) + + np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1) + np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1) + + @keras_parameterized.run_all_keras_modes + def test_layernorm_correctness(self): + _run_layernorm_correctness_test( + normalization.LayerNormalization, dtype='float32') + + @keras_parameterized.run_all_keras_modes + def test_layernorm_mixed_precision(self): + _run_layernorm_correctness_test( + normalization.LayerNormalization, dtype='float16') + + def doOutputTest(self, + input_shape, + tol=1e-5, + norm_axis=None, + params_axis=-1, + dtype=None): + ndim = len(input_shape) + if norm_axis is None: + moments_axis = range(1, ndim) + elif isinstance(norm_axis, int): + if norm_axis < 0: + moments_axis = [norm_axis + ndim] + else: + moments_axis = [norm_axis] + else: + moments_axis = [] + for dim in norm_axis: + if dim < 0: + dim = dim + ndim + moments_axis.append(dim) + + moments_axis = tuple(moments_axis) + expected_shape = [] + for i in range(ndim): + if i not in moments_axis: + expected_shape.append(input_shape[i]) + + expected_mean = np.zeros(expected_shape) + expected_var = np.ones(expected_shape) + for mu in [0.0, 1e2]: + for sigma in [1.0, 0.1]: + inputs = np.random.randn(*input_shape) * sigma + mu + inputs_t = constant_op.constant(inputs, shape=input_shape) + layer = normalization.LayerNormalization( + norm_axis=norm_axis, params_axis=params_axis, dtype=dtype) + outputs = layer(inputs_t) + beta = layer.beta + gamma = layer.gamma + for weight in layer.weights: + self.evaluate(weight.initializer) + outputs = self.evaluate(outputs) + beta = self.evaluate(beta) + gamma = self.evaluate(gamma) + + # The mean and variance of the output should be close to 0 and 1 + # respectively. + + # Make sure that there are no NaNs + self.assertFalse(np.isnan(outputs).any()) + mean = np.mean(outputs, axis=moments_axis) + var = np.var(outputs, axis=moments_axis) + # Layer-norm implemented in numpy + eps = 1e-12 + expected_out = ( + (gamma * (inputs - np.mean( + inputs, axis=moments_axis, keepdims=True)) / + np.sqrt(eps + np.var( + inputs, axis=moments_axis, keepdims=True))) + beta) + self.assertAllClose(expected_mean, mean, atol=tol, rtol=tol) + self.assertAllClose(expected_var, var, atol=tol) + # The full computation gets a bigger tolerance + self.assertAllClose(expected_out, outputs, atol=5 * tol) + + @tf_test_util.run_in_graph_and_eager_modes + def testOutput2DInput(self): + self.doOutputTest((10, 300)) + self.doOutputTest((10, 300), norm_axis=[0]) + self.doOutputTest((10, 300), params_axis=[0, 1]) + + @tf_test_util.run_in_graph_and_eager_modes + def testOutput2DInputDegenerateNormAxis(self): + with self.assertRaisesRegexp(ValueError, r'Invalid axis: 2'): + self.doOutputTest((10, 300), norm_axis=2) + + @tf_test_util.run_in_graph_and_eager_modes + def testOutput4DInput(self): + self.doOutputTest((100, 10, 10, 3)) + + @tf_test_util.run_in_graph_and_eager_modes + def testOutput4DInputNormOnInnermostAxis(self): + # Equivalent tests + shape = (100, 10, 10, 3) + self.doOutputTest( + shape, norm_axis=list(range(3, len(shape))), tol=1e-4, dtype='float64') + self.doOutputTest(shape, norm_axis=-1, tol=1e-4, dtype='float64') + + @tf_test_util.run_in_graph_and_eager_modes + def testOutputSmallInput(self): + self.doOutputTest((10, 10, 10, 30)) + + @tf_test_util.run_in_graph_and_eager_modes + def testOutputSmallInputNormOnInnermostAxis(self): + self.doOutputTest((10, 10, 10, 30), norm_axis=3) + + @tf_test_util.run_in_graph_and_eager_modes + def testOutputSmallInputNormOnMixedAxes(self): + self.doOutputTest((10, 10, 10, 30), norm_axis=[0, 3]) + self.doOutputTest((10, 10, 10, 30), params_axis=[-2, -1]) + self.doOutputTest((10, 10, 10, 30), norm_axis=[0, 3], + params_axis=[-3, -2, -1]) + + @tf_test_util.run_in_graph_and_eager_modes + def testOutputBigInput(self): + self.doOutputTest((1, 100, 100, 1)) + self.doOutputTest((1, 100, 100, 1), norm_axis=[1, 2]) + self.doOutputTest((1, 100, 100, 1), norm_axis=[1, 2], + params_axis=[-2, -1]) +""" if __name__ == "__main__": test.main() From 892110cdbe7f06638c7dd14bce5b8edd48e35be7 Mon Sep 17 00:00:00 2001 From: Moritz Date: Sun, 10 Feb 2019 20:54:26 +0100 Subject: [PATCH 04/26] minor changes --- tensorflow_addons/layers/python/normalizations_test.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index 57e9f6cfec..ed5f831955 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -42,7 +42,7 @@ def test_weights(self): @tf_test_util.run_all_in_graph_and_eager_modes def test_groupnorm_flat(self): - # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm + # Testing for 1 == LayerNorm, 16 == GroupNorm, -1 == InstanceNorm groups=[-1,16,1] for i in groups: model=keras.models.Sequential() @@ -59,6 +59,7 @@ def test_groupnorm_flat(self): self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) + @tf_test_util.run_all_in_graph_and_eager_modes def test_groupnorm_conv(self): # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm #groups=[1,5,-1] @@ -74,14 +75,7 @@ def test_groupnorm_conv(self): model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') model.fit(np.random.random((10,20, 20, 3))) self.assertTrue(hasattr(model.layers[0], 'gamma')) - self.assertTrue(hasattr(model.layers[0], 'beta')) - """def testUnknownShape(self): - inputs = array_ops.placeholder(dtypes.float32) - with self.assertRaisesRegexp(ValueError, 'undefined rank'): - GroupNormalization(inputs) - LayerNormaliztion(inputs) - InstanceNormalization(inputs)""" """ class LayerNormalizationTest(keras_parameterized.TestCase): From 57c60a7c9c3c3471a21827be1749c9b91ac8db90 Mon Sep 17 00:00:00 2001 From: Moritz Date: Sun, 10 Feb 2019 20:56:17 +0100 Subject: [PATCH 05/26] Merge remote-tracking branch 'upstream/master' into dev/tests --- BUILD | 2 +- tensorflow_addons/layers/BUILD | 31 +- tensorflow_addons/layers/__init__.py | 4 +- tensorflow_addons/layers/python/maxout.py | 98 +++++ .../layers/python/maxout_test.py | 71 ++++ tensorflow_addons/layers/python/poincare.py | 77 ++++ .../layers/python/poincare_test.py | 87 +++++ tensorflow_addons/layers/python/wrappers.py | 20 +- .../layers/python/wrappers_test.py | 12 +- tensorflow_addons/optimizers/BUILD | 24 ++ .../optimizers/python/lazy_adam_optimizer.py | 81 ++++ .../python/lazy_adam_optimizer_test.py | 348 ++++++++++++++++++ .../text/cc/kernels/skip_gram_kernels.cc | 10 +- .../text/python/skip_gram_ops_test.py | 16 +- 14 files changed, 847 insertions(+), 34 deletions(-) create mode 100644 tensorflow_addons/layers/python/maxout.py create mode 100644 tensorflow_addons/layers/python/maxout_test.py create mode 100644 tensorflow_addons/layers/python/poincare.py create mode 100644 tensorflow_addons/layers/python/poincare_test.py create mode 100644 tensorflow_addons/optimizers/python/lazy_adam_optimizer.py create mode 100644 tensorflow_addons/optimizers/python/lazy_adam_optimizer_test.py diff --git a/BUILD b/BUILD index 09d2a8a26a..01d63a8ac8 100644 --- a/BUILD +++ b/BUILD @@ -6,7 +6,7 @@ sh_binary( "MANIFEST.in", "setup.py", "tensorflow_addons/__init__.py", - "//tensorflow_addons/layers:layers_py", + "//tensorflow_addons/layers:layers_py", "//tensorflow_addons/text:text_py", ], ) diff --git a/tensorflow_addons/layers/BUILD b/tensorflow_addons/layers/BUILD index 208c1e1312..1b1ae10b9d 100644 --- a/tensorflow_addons/layers/BUILD +++ b/tensorflow_addons/layers/BUILD @@ -4,9 +4,11 @@ package(default_visibility = ["//visibility:public"]) py_library( name = "layers_py", - srcs = ([ + srcs = [ "__init__.py", "python/__init__.py", + "python/maxout.py", + "python/poincare.py", "python/wrappers.py", "python/normalizations.py" ]), @@ -19,9 +21,32 @@ py_test( "python/wrappers_test.py", ], main = "python/wrappers_test.py", + srcs_version = "PY2AND3", deps = [ - ":layers_py", - ], + ":layers_py", + ], +) + +py_test( + name = "maxout_py_test", + size = "small", + srcs = [ + "python/maxout_test.py", + ], + main = "python/maxout_test.py", + srcs_version = "PY2AND3", + deps = [ + ":layers_py", + ], +) + +py_test( + name = "poincare_py_test", + size = "small", + srcs = [ + "python/poincare_test.py", + ], + main = "python/poincare_test.py", srcs_version = "PY2AND3", ) diff --git a/tensorflow_addons/layers/__init__.py b/tensorflow_addons/layers/__init__.py index de8f5c2d2c..09a236c8c9 100644 --- a/tensorflow_addons/layers/__init__.py +++ b/tensorflow_addons/layers/__init__.py @@ -19,5 +19,5 @@ from __future__ import division from __future__ import print_function -# Weight Normalization Wrapper -from tensorflow_addons.layers.python.wrappers import WeightNorm +from tensorflow_addons.layers.python.maxout import Maxout +from tensorflow_addons.layers.python.wrappers import WeightNormalization diff --git a/tensorflow_addons/layers/python/maxout.py b/tensorflow_addons/layers/python/maxout.py new file mode 100644 index 0000000000..0beb2ae24d --- /dev/null +++ b/tensorflow_addons/layers/python/maxout.py @@ -0,0 +1,98 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementing Maxout layer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras.utils import generic_utils +from tensorflow.python.keras.engine.base_layer import Layer +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +class Maxout(Layer): + """Applies Maxout to the input. + + "Maxout Networks" Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron + Courville, Yoshua Bengio. https://arxiv.org/abs/1302.4389 + + Usually the operation is performed in the filter/channel dimension. This can + also be used after Dense layers to reduce number of features. + + Arguments: + num_units: Specifies how many features will remain after maxout + in the `axis` dimension (usually channel). + This must be a factor of number of features. + axis: The dimension where max pooling will be performed. Default is the + last dimension. + + Input shape: + nD tensor with shape: `(batch_size, ..., axis_dim, ...)`. + + Output shape: + nD tensor with shape: `(batch_size, ..., num_units, ...)`. + """ + + def __init__(self, num_units, axis=-1, **kwargs): + super(Maxout, self).__init__(**kwargs) + self.num_units = num_units + self.axis = axis + + def call(self, inputs): + inputs = ops.convert_to_tensor(inputs) + shape = inputs.get_shape().as_list() + # Dealing with batches with arbitrary sizes + for i in range(len(shape)): + if shape[i] is None: + shape[i] = array_ops.shape(inputs)[i] + + num_channels = shape[self.axis] + if (not isinstance(num_channels, ops.Tensor) + and num_channels % self.num_units): + raise ValueError('number of features({}) is not ' + 'a multiple of num_units({})'.format( + num_channels, self.num_units)) + + if self.axis < 0: + axis = self.axis + len(shape) + else: + axis = self.axis + assert axis >= 0, 'Find invalid axis: {}'.format(self.axis) + + expand_shape = shape[:] + expand_shape[axis] = self.num_units + k = num_channels // self.num_units + expand_shape.insert(axis, k) + + outputs = math_ops.reduce_max( + array_ops.reshape(inputs, expand_shape), axis, keepdims=False) + return outputs + + def compute_output_shape(self, input_shape): + input_shape = tensor_shape.TensorShape(input_shape).as_list() + input_shape[self.axis] = self.num_units + return tensor_shape.TensorShape(input_shape) + + def get_config(self): + config = {'num_units': self.num_units, 'axis': self.axis} + base_config = super(Maxout, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +generic_utils._GLOBAL_CUSTOM_OBJECTS['Maxout'] = Maxout diff --git a/tensorflow_addons/layers/python/maxout_test.py b/tensorflow_addons/layers/python/maxout_test.py new file mode 100644 index 0000000000..22e381f8c2 --- /dev/null +++ b/tensorflow_addons/layers/python/maxout_test.py @@ -0,0 +1,71 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Maxout layer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.keras import testing_utils +from tensorflow.python.platform import test +from tensorflow_addons.layers.python.maxout import Maxout + + +class MaxOutTest(test.TestCase): + def test_simple(self): + testing_utils.layer_test( + Maxout, kwargs={'num_units': 3}, input_shape=(5, 4, 2, 18)) + + def test_nchw(self): + testing_utils.layer_test( + Maxout, + kwargs={ + 'num_units': 4, + 'axis': 1 + }, + input_shape=(2, 20, 3, 6)) + + testing_utils.layer_test( + Maxout, + kwargs={ + 'num_units': 4, + 'axis': -3 + }, + input_shape=(2, 20, 3, 6)) + + def test_unknown(self): + inputs = np.random.random((5, 4, 2, 18)).astype('float32') + testing_utils.layer_test( + Maxout, + kwargs={'num_units': 3}, + input_shape=(5, 4, 2, None), + input_data=inputs) + + testing_utils.layer_test( + Maxout, + kwargs={'num_units': 3}, + input_shape=(None, None, None, None), + input_data=inputs) + + def test_invalid_shape(self): + with self.assertRaisesRegexp(ValueError, r'number of features'): + testing_utils.layer_test( + Maxout, kwargs={'num_units': 3}, input_shape=(5, 4, 2, 7)) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow_addons/layers/python/poincare.py b/tensorflow_addons/layers/python/poincare.py new file mode 100644 index 0000000000..037e7b0c82 --- /dev/null +++ b/tensorflow_addons/layers/python/poincare.py @@ -0,0 +1,77 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Implementing PoincareNormalize layer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.keras.utils import generic_utils +from tensorflow.python.keras.engine.base_layer import Layer +from tensorflow.python.ops import math_ops + + +class PoincareNormalize(Layer): + """Project into the Poincare ball with norm <= 1.0 - epsilon. + + https://en.wikipedia.org/wiki/Poincare_ball_model + + Used in + Poincare Embeddings for Learning Hierarchical Representations + Maximilian Nickel, Douwe Kiela + https://arxiv.org/pdf/1705.08039.pdf + + For a 1-D tensor with `axis = 0`, computes + + (x * (1 - epsilon)) / ||x|| if ||x|| > 1 - epsilon + output = + x otherwise + + For `x` with more dimensions, independently normalizes each 1-D slice along + dimension `axis`. + + Arguments: + axis: Axis along which to normalize. A scalar or a vector of + integers. + epsilon: A small deviation from the edge of the unit sphere for numerical + stability. + """ + + def __init__(self, axis=1, epsilon=1e-5, **kwargs): + super(PoincareNormalize, self).__init__(**kwargs) + self.axis = axis + self.epsilon = epsilon + + def call(self, inputs): + x = ops.convert_to_tensor(inputs) + square_sum = math_ops.reduce_sum( + math_ops.square(x), self.axis, keepdims=True) + x_inv_norm = math_ops.rsqrt(square_sum) + x_inv_norm = math_ops.minimum((1. - self.epsilon) * x_inv_norm, 1.) + outputs = math_ops.multiply(x, x_inv_norm) + return outputs + + def compute_output_shape(self, input_shape): + return input_shape + + def get_config(self): + config = {'axis': self.axis, 'epsilon': self.epsilon} + base_config = super(PoincareNormalize, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + +generic_utils._GLOBAL_CUSTOM_OBJECTS['PoincareNormalize'] = PoincareNormalize + diff --git a/tensorflow_addons/layers/python/poincare_test.py b/tensorflow_addons/layers/python/poincare_test.py new file mode 100644 index 0000000000..81be19c249 --- /dev/null +++ b/tensorflow_addons/layers/python/poincare_test.py @@ -0,0 +1,87 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for PoincareNormalize layer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.keras import testing_utils +from tensorflow.python.platform import test +from tensorflow_addons.layers.python.poincare import PoincareNormalize + + +class PoincareNormalizeTest(test.TestCase): + def _PoincareNormalize(self, x, dim, epsilon=1e-5): + if isinstance(dim, list): + norm = np.linalg.norm(x, axis=tuple(dim)) + for d in dim: + norm = np.expand_dims(norm, d) + norm_x = ((1. - epsilon) * x) / norm + else: + norm = np.expand_dims( + np.apply_along_axis(np.linalg.norm, dim, x), dim) + norm_x = ((1. - epsilon) * x) / norm + return np.where(norm > 1.0 - epsilon, norm_x, x) + + def testPoincareNormalize(self): + x_shape = [20, 7, 3] + epsilon = 1e-5 + tol = 1e-6 + np.random.seed(1) + inputs = np.random.random_sample(x_shape).astype(np.float32) + + for dim in range(len(x_shape)): + outputs_expected = self._PoincareNormalize(inputs, dim, epsilon) + + outputs = testing_utils.layer_test( + PoincareNormalize, + kwargs={ + 'axis': dim, + 'epsilon': epsilon + }, + input_data=inputs, + expected_output=outputs_expected) + for y in outputs_expected, outputs: + norm = np.linalg.norm(y, axis=dim) + self.assertLessEqual(norm.max(), 1. - epsilon + tol) + + def testPoincareNormalizeDimArray(self): + x_shape = [20, 7, 3] + epsilon = 1e-5 + tol = 1e-6 + np.random.seed(1) + inputs = np.random.random_sample(x_shape).astype(np.float32) + dim = [1, 2] + + outputs_expected = self._PoincareNormalize(inputs, dim, epsilon) + + outputs = testing_utils.layer_test( + PoincareNormalize, + kwargs={ + 'axis': dim, + 'epsilon': epsilon + }, + input_data=inputs, + expected_output=outputs_expected) + for y in outputs_expected, outputs: + norm = np.linalg.norm(y, axis=tuple(dim)) + self.assertLessEqual(norm.max(), 1. - epsilon + tol) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow_addons/layers/python/wrappers.py b/tensorflow_addons/layers/python/wrappers.py index e9e5df37c9..95bfd7eb68 100644 --- a/tensorflow_addons/layers/python/wrappers.py +++ b/tensorflow_addons/layers/python/wrappers.py @@ -26,22 +26,22 @@ from tensorflow.python.ops import variables as tf_variables -class WeightNorm(Wrapper): +class WeightNormalization(Wrapper): """ This wrapper reparameterizes a layer by decoupling the weight's magnitude and direction. This speeds up convergence by improving the conditioning of the optimization problem. Weight Normalization: A Simple Reparameterization to Accelerate Training of Deep Neural Networks: https://arxiv.org/abs/1602.07868 Tim Salimans, Diederik P. Kingma (2016) - WeightNorm wrapper works for keras and tf layers. + WeightNormalization wrapper works for keras and tf layers. ```python - net = WeightNorm(tf.keras.layers.Conv2D(2, 2, activation='relu'), + net = WeightNormalization(tf.keras.layers.Conv2D(2, 2, activation='relu'), input_shape=(32, 32, 3), data_init=True)(x) - net = WeightNorm(tf.keras.layers.Conv2D(16, 5, activation='relu'), + net = WeightNormalization(tf.keras.layers.Conv2D(16, 5, activation='relu'), data_init=True)(net) - net = WeightNorm(tf.keras.layers.Dense(120, activation='relu'), + net = WeightNormalization(tf.keras.layers.Dense(120, activation='relu'), data_init=True)(net) - net = WeightNorm(tf.keras.layers.Dense(n_classes), + net = WeightNormalization(tf.keras.layers.Dense(n_classes), data_init=True)(net) ``` Arguments: @@ -55,7 +55,7 @@ class WeightNorm(Wrapper): def __init__(self, layer, data_init=False, **kwargs): if not isinstance(layer, Layer): raise ValueError( - 'Please initialize `WeightNorm` layer with a ' + 'Please initialize `WeightNormalization` layer with a ' '`Layer` instance. You passed: {input}'.format(input=layer)) if not context.executing_eagerly() and data_init: @@ -67,7 +67,7 @@ def __init__(self, layer, data_init=False, **kwargs): if data_init: self.initialized = False - super(WeightNorm, self).__init__(layer, **kwargs) + super(WeightNormalization, self).__init__(layer, **kwargs) self._track_checkpointable(layer, name='layer') def _compute_weights(self): @@ -114,7 +114,7 @@ def build(self, input_shape): if not hasattr(self.layer, 'kernel'): raise ValueError( - '`WeightNorm` must wrap a layer that' + '`WeightNormalization` must wrap a layer that' ' contains a `kernel` for weights' ) @@ -137,7 +137,7 @@ def build(self, input_shape): self.layer.built = True - super(WeightNorm, self).build() + super(WeightNormalization, self).build() self.built = True def call(self, inputs): diff --git a/tensorflow_addons/layers/python/wrappers_test.py b/tensorflow_addons/layers/python/wrappers_test.py index da418fcb3e..faa5e5b90f 100644 --- a/tensorflow_addons/layers/python/wrappers_test.py +++ b/tensorflow_addons/layers/python/wrappers_test.py @@ -29,12 +29,12 @@ from tensorflow.python import keras -class WeightNormTest(test.TestCase): +class WeightNormalizationTest(test.TestCase): @tf_test_util.run_all_in_graph_and_eager_modes def test_weightnorm_dense_train(self): model = keras.models.Sequential() - model.add(wrappers.WeightNorm( + model.add(wrappers.WeightNormalization( keras.layers.Dense(2), input_shape=(3, 4))) model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') @@ -48,7 +48,7 @@ def test_weightnorm_dense_train(self): @tf_test_util.run_all_in_graph_and_eager_modes def test_weightnorm_conv2d(self): model = keras.models.Sequential() - model.add(wrappers.WeightNorm( + model.add(wrappers.WeightNormalization( keras.layers.Conv2D(5, (2, 2), padding='same'), input_shape=(4, 4, 3))) @@ -63,7 +63,7 @@ def test_weightnorm_conv2d(self): @tf_test_util.run_all_in_graph_and_eager_modes def test_weight_norm_tflayers(self): images = random_ops.random_uniform((2, 4, 4, 3)) - wn_wrapper = wrappers.WeightNorm(layers.Conv2D(32, [2, 2]), + wn_wrapper = wrappers.WeightNormalization(layers.Conv2D(32, [2, 2]), input_shape=(4, 4, 3)) wn_wrapper.apply(images) self.assertTrue(hasattr(wn_wrapper.layer, 'g')) @@ -72,12 +72,12 @@ def test_weight_norm_tflayers(self): def test_weight_norm_nonlayer(self): images = random_ops.random_uniform((2, 4, 43)) with self.assertRaises(ValueError): - wrappers.WeightNorm(images) + wrappers.WeightNormalization(images) @tf_test_util.run_all_in_graph_and_eager_modes def test_weight_norm_nokernel(self): with self.assertRaises(ValueError): - wrappers.WeightNorm(layers.MaxPooling2D(2, 2)).build((2, 2)) + wrappers.WeightNormalization(layers.MaxPooling2D(2, 2)).build((2, 2)) if __name__ == "__main__": diff --git a/tensorflow_addons/optimizers/BUILD b/tensorflow_addons/optimizers/BUILD index 3ad427fd87..dff0f34c88 100644 --- a/tensorflow_addons/optimizers/BUILD +++ b/tensorflow_addons/optimizers/BUILD @@ -1,3 +1,27 @@ licenses(["notice"]) # Apache 2.0 package(default_visibility = ["//visibility:public"]) + +py_library( + name = "optimizers_py", + srcs = [ + "__init__.py", + "python/__init__.py", + "python/lazy_adam_optimizer.py", + ], + srcs_version = "PY2AND3", +) + + +py_test( + name = "lazy_adam_optimizer_test", + size = "small", + srcs = [ + "python/lazy_adam_optimizer_test.py" + ], + main = "python/lazy_adam_optimizer_test.py", + deps = [ + ":optimizers_py", + ], + srcs_version = "PY2AND3", +) diff --git a/tensorflow_addons/optimizers/python/lazy_adam_optimizer.py b/tensorflow_addons/optimizers/python/lazy_adam_optimizer.py new file mode 100644 index 0000000000..91e48085f3 --- /dev/null +++ b/tensorflow_addons/optimizers/python/lazy_adam_optimizer.py @@ -0,0 +1,81 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Variant of the Adam optimizer that handles sparse updates more efficiently. + +Compared with the original Adam optimizer, the one in this file can +provide a large improvement in model training throughput for some +applications. However, it provides slightly different semantics than the +original Adam algorithm, and may lead to different empirical results. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.keras.optimizer_v2 import adam +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops + + +class LazyAdamOptimizer(adam.Adam): + """Variant of the Adam optimizer that handles sparse updates more efficiently. + + The original Adam algorithm maintains two moving-average accumulators for + each trainable variable; the accumulators are updated at every step. + This class provides lazier handling of gradient updates for sparse variables. + It only updates moving-average accumulators for sparse variable indices that + appear in the current batch, rather than updating the accumulators for all + indices. Compared with the original Adam optimizer, it can provide large + improvements in model training throughput for some applications. However, it + provides slightly different semantics than the original Adam algorithm, and + may lead to different empirical results. + + Note, amsgrad is currently not supported and the argument can only be False. + """ + + def _resource_apply_sparse(self, grad, var, indices): + var_dtype = var.dtype.base_dtype + lr_t = self._decayed_lr(var_dtype) + beta_1_t = self._get_hyper('beta_1', var_dtype) + beta_2_t = self._get_hyper('beta_2', var_dtype) + local_step = math_ops.cast(self.iterations + 1, var_dtype) + beta_1_power = math_ops.pow(beta_1_t, local_step) + beta_2_power = math_ops.pow(beta_2_t, local_step) + epsilon_t = self._get_hyper('epsilon', var_dtype) + lr = (lr_t * math_ops.sqrt(1 - beta_2_power) / (1 - beta_1_power)) + + # \\(m := beta1 * m + (1 - beta1) * g_t\\) + m = self.get_slot(var, "m") + m_t_slice = beta_1_t * array_ops.gather( + m, indices) + (1 - beta_1_t) * grad + m_update_op = resource_variable_ops.resource_scatter_update( + m.handle, indices, m_t_slice) + + # \\(v := beta2 * v + (1 - beta2) * (g_t * g_t)\\) + v = self.get_slot(var, "v") + v_t_slice = (beta_2_t * array_ops.gather(v, indices) + + (1 - beta_2_t) * math_ops.square(grad)) + v_update_op = resource_variable_ops.resource_scatter_update( + v.handle, indices, v_t_slice) + + # \\(variable -= learning_rate * m_t / (epsilon_t + sqrt(v_t))\\) + var_slice = lr * m_t_slice / (math_ops.sqrt(v_t_slice) + epsilon_t) + var_update_op = resource_variable_ops.resource_scatter_sub( + var.handle, indices, var_slice) + + return control_flow_ops.group( + *[var_update_op, m_update_op, v_update_op]) diff --git a/tensorflow_addons/optimizers/python/lazy_adam_optimizer_test.py b/tensorflow_addons/optimizers/python/lazy_adam_optimizer_test.py new file mode 100644 index 0000000000..6b7e034045 --- /dev/null +++ b/tensorflow_addons/optimizers/python/lazy_adam_optimizer_test.py @@ -0,0 +1,348 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for LazyAdamOptimizer.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.ops import variables +from tensorflow.python.platform import test +from tensorflow_addons.optimizers.python import lazy_adam_optimizer + + +def adam_update_numpy(param, + g_t, + t, + m, + v, + lr=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-7): + lr_t = lr * np.sqrt(1 - beta2**(t + 1)) / (1 - beta1**(t + 1)) + + m_t = beta1 * m + (1 - beta1) * g_t + v_t = beta2 * v + (1 - beta2) * g_t * g_t + + param_t = param - lr_t * m_t / (np.sqrt(v_t) + epsilon) + return param_t, m_t, v_t + + +def get_beta_accumulators(opt, dtype): + local_step = math_ops.cast(opt.iterations + 1, dtype) + beta_1_t = math_ops.cast(opt._get_hyper("beta_1"), dtype) + beta_1_power = math_ops.pow(beta_1_t, local_step) + beta_2_t = math_ops.cast(opt._get_hyper("beta_2"), dtype) + beta_2_power = math_ops.pow(beta_2_t, local_step) + return (beta_1_power, beta_2_power) + + +class LazyAdamOptimizerTest(test.TestCase): + + # TODO: remove v1 tests (keep pace with adam_test.py in keras). + @test_util.run_deprecated_v1 + def testSparse(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.0, 0.1], + dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.0, 0.01], + dtype=dtype.as_numpy_dtype) + + var0 = resource_variable_ops.ResourceVariable(var0_np) + var1 = resource_variable_ops.ResourceVariable(var1_np) + grads0_np_indices = np.array([0, 2], dtype=np.int32) + grads0 = ops.IndexedSlices( + constant_op.constant(grads0_np[grads0_np_indices]), + constant_op.constant(grads0_np_indices), + constant_op.constant([3])) + grads1_np_indices = np.array([0, 2], dtype=np.int32) + grads1 = ops.IndexedSlices( + constant_op.constant(grads1_np[grads1_np_indices]), + constant_op.constant(grads1_np_indices), + constant_op.constant([3])) + opt = lazy_adam_optimizer.LazyAdamOptimizer() + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 3.0, 4.0], self.evaluate(var1)) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + # Run 3 steps of Adam + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9**(t + 1), self.evaluate(beta_1_power)) + self.assertAllCloseAccordingToType( + 0.999**(t + 1), self.evaluate(beta_2_power)) + self.evaluate(update) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, + self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, + self.evaluate(var1)) + + @test_util.run_deprecated_v1 + def testSparseDevicePlacement(self): + for index_dtype in [dtypes.int32, dtypes.int64]: + with self.cached_session(force_gpu=test.is_gpu_available()): + # If a GPU is available, tests that all optimizer ops can be placed on + # it (i.e. they have GPU kernels). + var = variables.Variable([[1.0], [2.0]]) + indices = constant_op.constant([0, 1], dtype=index_dtype) + g_sum = lambda: math_ops.reduce_sum(array_ops.gather(var, indices)) # pylint: disable=cell-var-from-loop + optimizer = lazy_adam_optimizer.LazyAdamOptimizer(3.0) + minimize_op = optimizer.minimize(g_sum, var_list=[var]) + self.evaluate(variables.global_variables_initializer()) + self.evaluate(minimize_op) + + @test_util.run_deprecated_v1 + def testSparseRepeatedIndices(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.cached_session(): + repeated_index_update_var = variables.Variable([[1.0], [2.0]], + dtype=dtype) + aggregated_update_var = variables.Variable([[1.0], [2.0]], + dtype=dtype) + grad_repeated_index = ops.IndexedSlices( + constant_op.constant([0.1, 0.1], + shape=[2, 1], + dtype=dtype), + constant_op.constant([1, 1]), + constant_op.constant([2, 1])) + grad_aggregated = ops.IndexedSlices( + constant_op.constant([0.2], shape=[1, 1], dtype=dtype), + constant_op.constant([1]), constant_op.constant([2, 1])) + repeated_update_opt = lazy_adam_optimizer.LazyAdamOptimizer() + repeated_update = repeated_update_opt.apply_gradients( + [(grad_repeated_index, repeated_index_update_var)]) + aggregated_update_opt = lazy_adam_optimizer.LazyAdamOptimizer() + aggregated_update = aggregated_update_opt.apply_gradients( + [(grad_aggregated, aggregated_update_var)]) + self.evaluate(variables.global_variables_initializer()) + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + for _ in range(3): + repeated_update.run() + aggregated_update.run() + self.assertAllClose(aggregated_update_var.eval(), + repeated_index_update_var.eval()) + + def doTestBasic(self, use_callable_params=False): + for i, dtype in enumerate( + [dtypes.half, dtypes.float32, dtypes.float64]): + with self.session(graph=ops.Graph()): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = resource_variable_ops.ResourceVariable( + var0_np, name="var0_%d" % i) + var1 = resource_variable_ops.ResourceVariable( + var1_np, name="var1_%d" % i) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + + learning_rate = lambda: 0.001 + beta1 = lambda: 0.9 + beta2 = lambda: 0.999 + epsilon = lambda: 1e-8 + if not use_callable_params: + learning_rate = learning_rate() + beta1 = beta1() + beta2 = beta2() + epsilon = epsilon() + + opt = lazy_adam_optimizer.LazyAdamOptimizer( + learning_rate=learning_rate) + if not context.executing_eagerly(): + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of Adam + for t in range(3): + beta_1_power, beta_2_power = get_beta_accumulators( + opt, dtype) + self.assertAllCloseAccordingToType( + 0.9**(t + 1), self.evaluate(beta_1_power)) + self.assertAllCloseAccordingToType( + 0.999**(t + 1), self.evaluate(beta_2_power)) + if not context.executing_eagerly(): + self.evaluate(update) + else: + opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, + self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, + self.evaluate(var1)) + self.assertEqual("var0_%d/m:0" % (i, ), + opt.get_slot(var0, "m").name) + + @test_util.run_in_graph_and_eager_modes(reset_test=True) + def testResourceBasic(self): + self.doTestBasic() + + def testBasicCallableParams(self): + with context.eager_mode(): + self.doTestBasic(use_callable_params=True) + + @test_util.run_deprecated_v1 + def testTensorLearningRate(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = lazy_adam_optimizer.LazyAdamOptimizer( + constant_op.constant(0.001)) + update = opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], var0.eval()) + self.assertAllClose([3.0, 4.0], var1.eval()) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + # Run 3 steps of Adam + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9**(t + 1), self.evaluate(beta_1_power)) + self.assertAllCloseAccordingToType( + 0.999**(t + 1), self.evaluate(beta_2_power)) + self.evaluate(update) + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, + self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, + self.evaluate(var1)) + + @test_util.run_deprecated_v1 + def testSharing(self): + for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: + with self.cached_session(): + # Initialize variables for numpy implementation. + m0, v0, m1, v1 = 0.0, 0.0, 0.0, 0.0 + var0_np = np.array([1.0, 2.0], dtype=dtype.as_numpy_dtype) + grads0_np = np.array([0.1, 0.1], dtype=dtype.as_numpy_dtype) + var1_np = np.array([3.0, 4.0], dtype=dtype.as_numpy_dtype) + grads1_np = np.array([0.01, 0.01], dtype=dtype.as_numpy_dtype) + + var0 = variables.Variable(var0_np) + var1 = variables.Variable(var1_np) + grads0 = constant_op.constant(grads0_np) + grads1 = constant_op.constant(grads1_np) + opt = lazy_adam_optimizer.LazyAdamOptimizer() + update1 = opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + update2 = opt.apply_gradients( + zip([grads0, grads1], [var0, var1])) + self.evaluate(variables.global_variables_initializer()) + + beta_1_power, beta_2_power = get_beta_accumulators(opt, dtype) + + # Fetch params to validate initial values + self.assertAllClose([1.0, 2.0], self.evaluate(var0)) + self.assertAllClose([3.0, 4.0], self.evaluate(var1)) + + # Run 3 steps of intertwined Adam1 and Adam2. + for t in range(3): + self.assertAllCloseAccordingToType( + 0.9**(t + 1), self.evaluate(beta_1_power)) + self.assertAllCloseAccordingToType( + 0.999**(t + 1), self.evaluate(beta_2_power)) + if t % 2 == 0: + update1.run() + else: + update2.run() + + var0_np, m0, v0 = adam_update_numpy( + var0_np, grads0_np, t, m0, v0) + var1_np, m1, v1 = adam_update_numpy( + var1_np, grads1_np, t, m1, v1) + + # Validate updated params + self.assertAllCloseAccordingToType(var0_np, + self.evaluate(var0)) + self.assertAllCloseAccordingToType(var1_np, + self.evaluate(var1)) + + def testSlotsUniqueEager(self): + with context.eager_mode(): + v1 = resource_variable_ops.ResourceVariable(1.) + v2 = resource_variable_ops.ResourceVariable(1.) + opt = lazy_adam_optimizer.LazyAdamOptimizer(1.) + opt.minimize(lambda: v1 + v2, var_list=[v1, v2]) + # There should be iteration, and two unique slot variables for v1 and v2. + self.assertEqual(5, len(set(opt.variables()))) + self.assertEqual( + self.evaluate(opt.variables()[0]), + self.evaluate(opt.iterations)) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow_addons/text/cc/kernels/skip_gram_kernels.cc b/tensorflow_addons/text/cc/kernels/skip_gram_kernels.cc index c75b98a924..7480177985 100644 --- a/tensorflow_addons/text/cc/kernels/skip_gram_kernels.cc +++ b/tensorflow_addons/text/cc/kernels/skip_gram_kernels.cc @@ -47,11 +47,17 @@ class SkipGramGenerateCandidatesOp : public OpKernel { OP_REQUIRES_OK(context, context->input("max_skips", &max_skips_tensor)); const int max_skips = *(max_skips_tensor->scalar().data()); + const Tensor& input_check = context->input(0); + OP_REQUIRES( + context, TensorShapeUtils::IsVector(input_check.shape()), + errors::InvalidArgument("input_tensor must be of rank 1")); + OP_REQUIRES( context, min_skips >= 0 && max_skips >= 0, errors::InvalidArgument("Both min_skips and max_skips must be >= 0.")); - OP_REQUIRES(context, min_skips <= max_skips, - errors::InvalidArgument("min_skips must be <= max_skips.")); + OP_REQUIRES( + context, min_skips <= max_skips, + errors::InvalidArgument("min_skips must be <= max_skips.")); const Tensor* start_tensor; OP_REQUIRES_OK(context, context->input("start", &start_tensor)); diff --git a/tensorflow_addons/text/python/skip_gram_ops_test.py b/tensorflow_addons/text/python/skip_gram_ops_test.py index 8f3a578c55..01bf5da7de 100644 --- a/tensorflow_addons/text/python/skip_gram_ops_test.py +++ b/tensorflow_addons/text/python/skip_gram_ops_test.py @@ -265,15 +265,11 @@ def test_skip_gram_sample_errors(self): text.skip_gram_sample(input_tensor, min_skips=min_skips, max_skips=max_skips) - ######################################### - - # FIXME: Why is this not failing? - # with self.assertRaises(ValueError): - # invalid_tensor = constant_op.constant([[b"the"], [b"quick"], - # [b"brown"]]) - # text.skip_gram_sample(invalid_tensor) - - ######################################### + # Eager tensor must be rank 1 + with self.assertRaises(errors.InvalidArgumentError): + invalid_tensor = constant_op.constant([[b"the"], [b"quick"], + [b"brown"]]) + text.skip_gram_sample(invalid_tensor) # vocab_freq_table must be provided if vocab_min_count, # vocab_subsampling, or corpus_size is specified. @@ -479,7 +475,7 @@ def _text_vocab_subsample_vocab_helper(self, vocab_freq_file, vocab_freq_file=vocab_freq_file, vocab_token_index=0, vocab_freq_index=1, - vocab_freq_dtype=vocab_freq_dtype, + vocab_freq_dtype=dtypes.float64, vocab_min_count=vocab_min_count, vocab_subsampling=0.05, corpus_size=corpus_size, From d5beb61aeb9e7a06385a556dd0c33f9900856fb2 Mon Sep 17 00:00:00 2001 From: Moritz Date: Sun, 10 Feb 2019 23:27:54 +0100 Subject: [PATCH 06/26] added function for easy testing. --- .../layers/python/normalizations_test.py | 33 +++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index ed5f831955..2b29fc97f7 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -23,6 +23,19 @@ from tensorflow.python.framework import test_util as tf_test_util +def create_and_fit_Sequential_model(layer): + model=keras.models.Sequential() + model.add(layer) + model.add(keras.layers.Dense(32)) + + model.compile(optimizer=RMSPropOptimizer(0.01),loss="mse") + layer_shape=(10,)+layer.input_shape[1:] + print(type(layer_shape)) + input_batch=np.random.random_sample(size=layer_shape) + model.fit(input_batch, + epochs=1, + batch_size=5) + return model class normalization_test(test.TestCase): @tf_test_util.run_all_in_graph_and_eager_modes @@ -32,30 +45,23 @@ def test_weights(self): self.assertEqual(len(layer.trainable_weights), 0) self.assertEqual(len(layer.weights), 0) - layer = keras.layers.LayerNormalization() + layer = LayerNormalization() layer.build((None, 3, 4)) self.assertEqual(len(layer.trainable_weights), 2) self.assertEqual(len(layer.weights), 2) - - + layer = InstanceNormalization() + layer.build((None, 3, 4)) + self.assertEqual(len(layer.trainable_weights),2) + self.assertEqual(len(layer.weights),2) @tf_test_util.run_all_in_graph_and_eager_modes def test_groupnorm_flat(self): # Testing for 1 == LayerNorm, 16 == GroupNorm, -1 == InstanceNorm groups=[-1,16,1] for i in groups: - model=keras.models.Sequential() - model.add(GroupNormalization( - input_shape=(32,),groups=i)) - model.add(keras.layers.Dense(32)) - model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') - model.fit( - np.random.random((10,32)), - np.random.random((10,32)), - epochs=1, - batch_size=10) + model=create_and_fit_Sequential_model(GroupNormalization(input_shape=(64,),groups=i)) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) @@ -76,6 +82,7 @@ def test_groupnorm_conv(self): model.fit(np.random.random((10,20, 20, 3))) self.assertTrue(hasattr(model.layers[0], 'gamma')) + """ class LayerNormalizationTest(keras_parameterized.TestCase): From 7a361ce5ccd50ef543827a143883d2827a7113b7 Mon Sep 17 00:00:00 2001 From: Moritz Date: Sun, 10 Feb 2019 23:30:17 +0100 Subject: [PATCH 07/26] clean up --- .../layers/python/normalizations_test.py | 176 ------------------ 1 file changed, 176 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index 2b29fc97f7..1b42340fd4 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -83,181 +83,5 @@ def test_groupnorm_conv(self): self.assertTrue(hasattr(model.layers[0], 'gamma')) -""" -class LayerNormalizationTest(keras_parameterized.TestCase): - - - @tf_test_util.run_in_graph_and_eager_modes - def test_layernorm_regularization(self): - layer = keras.layers.LayerNormalization( - gamma_regularizer='l1', beta_regularizer='l1') - layer.build((None, 3, 4)) - self.assertEqual(len(layer.losses), 2) - max_norm = keras.constraints.max_norm - layer = keras.layers.LayerNormalization( - gamma_constraint=max_norm, beta_constraint=max_norm) - layer.build((None, 3, 4)) - self.assertEqual(layer.gamma.constraint, max_norm) - self.assertEqual(layer.beta.constraint, max_norm) - - @keras_parameterized.run_all_keras_modes - def test_layernorm_convnet(self): - if test.is_gpu_available(cuda_only=True): - with self.session(use_gpu=True): - model = keras.models.Sequential() - norm = keras.layers.LayerNormalization(input_shape=(3, 4, 4)) - model.add(norm) - model.compile(loss='mse', - optimizer=gradient_descent.GradientDescentOptimizer(0.01), - run_eagerly=testing_utils.should_run_eagerly()) - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - out -= np.reshape(keras.backend.eval(norm.beta), (1, 3, 1, 1)) - out /= np.reshape(keras.backend.eval(norm.gamma), (1, 3, 1, 1)) - - np.testing.assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) - np.testing.assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) - - @keras_parameterized.run_all_keras_modes - def test_layernorm_convnet_channel_last(self): - model = keras.models.Sequential() - norm = keras.layers.LayerNormalization(input_shape=(4, 4, 3)) - model.add(norm) - model.compile(loss='mse', - optimizer=gradient_descent.GradientDescentOptimizer(0.01), - run_eagerly=testing_utils.should_run_eagerly()) - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 4, 4, 3)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - out -= np.reshape(keras.backend.eval(norm.beta), (1, 1, 1, 3)) - out /= np.reshape(keras.backend.eval(norm.gamma), (1, 1, 1, 3)) - - np.testing.assert_allclose(np.mean(out, axis=(0, 1, 2)), 0.0, atol=1e-1) - np.testing.assert_allclose(np.std(out, axis=(0, 1, 2)), 1.0, atol=1e-1) - - @keras_parameterized.run_all_keras_modes - def test_layernorm_correctness(self): - _run_layernorm_correctness_test( - normalization.LayerNormalization, dtype='float32') - - @keras_parameterized.run_all_keras_modes - def test_layernorm_mixed_precision(self): - _run_layernorm_correctness_test( - normalization.LayerNormalization, dtype='float16') - - def doOutputTest(self, - input_shape, - tol=1e-5, - norm_axis=None, - params_axis=-1, - dtype=None): - ndim = len(input_shape) - if norm_axis is None: - moments_axis = range(1, ndim) - elif isinstance(norm_axis, int): - if norm_axis < 0: - moments_axis = [norm_axis + ndim] - else: - moments_axis = [norm_axis] - else: - moments_axis = [] - for dim in norm_axis: - if dim < 0: - dim = dim + ndim - moments_axis.append(dim) - - moments_axis = tuple(moments_axis) - expected_shape = [] - for i in range(ndim): - if i not in moments_axis: - expected_shape.append(input_shape[i]) - - expected_mean = np.zeros(expected_shape) - expected_var = np.ones(expected_shape) - for mu in [0.0, 1e2]: - for sigma in [1.0, 0.1]: - inputs = np.random.randn(*input_shape) * sigma + mu - inputs_t = constant_op.constant(inputs, shape=input_shape) - layer = normalization.LayerNormalization( - norm_axis=norm_axis, params_axis=params_axis, dtype=dtype) - outputs = layer(inputs_t) - beta = layer.beta - gamma = layer.gamma - for weight in layer.weights: - self.evaluate(weight.initializer) - outputs = self.evaluate(outputs) - beta = self.evaluate(beta) - gamma = self.evaluate(gamma) - - # The mean and variance of the output should be close to 0 and 1 - # respectively. - - # Make sure that there are no NaNs - self.assertFalse(np.isnan(outputs).any()) - mean = np.mean(outputs, axis=moments_axis) - var = np.var(outputs, axis=moments_axis) - # Layer-norm implemented in numpy - eps = 1e-12 - expected_out = ( - (gamma * (inputs - np.mean( - inputs, axis=moments_axis, keepdims=True)) / - np.sqrt(eps + np.var( - inputs, axis=moments_axis, keepdims=True))) + beta) - self.assertAllClose(expected_mean, mean, atol=tol, rtol=tol) - self.assertAllClose(expected_var, var, atol=tol) - # The full computation gets a bigger tolerance - self.assertAllClose(expected_out, outputs, atol=5 * tol) - - @tf_test_util.run_in_graph_and_eager_modes - def testOutput2DInput(self): - self.doOutputTest((10, 300)) - self.doOutputTest((10, 300), norm_axis=[0]) - self.doOutputTest((10, 300), params_axis=[0, 1]) - - @tf_test_util.run_in_graph_and_eager_modes - def testOutput2DInputDegenerateNormAxis(self): - with self.assertRaisesRegexp(ValueError, r'Invalid axis: 2'): - self.doOutputTest((10, 300), norm_axis=2) - - @tf_test_util.run_in_graph_and_eager_modes - def testOutput4DInput(self): - self.doOutputTest((100, 10, 10, 3)) - - @tf_test_util.run_in_graph_and_eager_modes - def testOutput4DInputNormOnInnermostAxis(self): - # Equivalent tests - shape = (100, 10, 10, 3) - self.doOutputTest( - shape, norm_axis=list(range(3, len(shape))), tol=1e-4, dtype='float64') - self.doOutputTest(shape, norm_axis=-1, tol=1e-4, dtype='float64') - - @tf_test_util.run_in_graph_and_eager_modes - def testOutputSmallInput(self): - self.doOutputTest((10, 10, 10, 30)) - - @tf_test_util.run_in_graph_and_eager_modes - def testOutputSmallInputNormOnInnermostAxis(self): - self.doOutputTest((10, 10, 10, 30), norm_axis=3) - - @tf_test_util.run_in_graph_and_eager_modes - def testOutputSmallInputNormOnMixedAxes(self): - self.doOutputTest((10, 10, 10, 30), norm_axis=[0, 3]) - self.doOutputTest((10, 10, 10, 30), params_axis=[-2, -1]) - self.doOutputTest((10, 10, 10, 30), norm_axis=[0, 3], - params_axis=[-3, -2, -1]) - - @tf_test_util.run_in_graph_and_eager_modes - def testOutputBigInput(self): - self.doOutputTest((1, 100, 100, 1)) - self.doOutputTest((1, 100, 100, 1), norm_axis=[1, 2]) - self.doOutputTest((1, 100, 100, 1), norm_axis=[1, 2], - params_axis=[-2, -1]) - -""" if __name__ == "__main__": test.main() From 2c174fc7f037871a7022a612f8e0e769008fe2e6 Mon Sep 17 00:00:00 2001 From: smokrow Date: Wed, 13 Feb 2019 22:09:39 +0100 Subject: [PATCH 08/26] found bug in BUILD File --- tensorflow_addons/layers/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/layers/BUILD b/tensorflow_addons/layers/BUILD index 1b1ae10b9d..cad1dc0905 100644 --- a/tensorflow_addons/layers/BUILD +++ b/tensorflow_addons/layers/BUILD @@ -4,7 +4,7 @@ package(default_visibility = ["//visibility:public"]) py_library( name = "layers_py", - srcs = [ + srcs = ([ "__init__.py", "python/__init__.py", "python/maxout.py", From 7c32461cc16abbc6189d1db757c75f9e6dcf5591 Mon Sep 17 00:00:00 2001 From: Smokrow Date: Sun, 17 Feb 2019 23:55:23 +0100 Subject: [PATCH 09/26] fixed signature bug and added tests --- .../layers/python/normalizations.py | 1 + .../layers/python/normalizations_test.py | 79 +++++++++++++------ 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 52251557e8..01206b5dcf 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -137,6 +137,7 @@ def build(self, input_shape): else: self.beta = None self.built = True + super(GroupNormalization, self).build(input_shape) def call(self, inputs): input_shape = K.int_shape(inputs) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index 1b42340fd4..2e1035b34f 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -15,31 +15,33 @@ from tensorflow_addons.layers.python.normalizations import GroupNormalization,LayerNormalization,InstanceNormalization import numpy as np +import scipy as scipy import tensorflow as tf -from tensorflow.python import keras as keras +from tensorflow import keras as keras from tensorflow.python.training.rmsprop import RMSPropOptimizer - from tensorflow.python.platform import test from tensorflow.python.framework import test_util as tf_test_util -def create_and_fit_Sequential_model(layer): +def create_and_fit_Sequential_model(layer,shape): + #Helperfunction for quick evaluation model=keras.models.Sequential() model.add(layer) model.add(keras.layers.Dense(32)) + model.add(keras.layers.Dense(1)) - model.compile(optimizer=RMSPropOptimizer(0.01),loss="mse") - layer_shape=(10,)+layer.input_shape[1:] - print(type(layer_shape)) - input_batch=np.random.random_sample(size=layer_shape) - model.fit(input_batch, - epochs=1, - batch_size=5) + model.compile(optimizer=RMSPropOptimizer(0.01),loss="categorical_crossentropy") + layer_shape=(10,)+shape + input_batch=np.random.rand(*layer_shape) + output_batch=np.random.rand(*(10,1)) + model.fit(x=input_batch,y=output_batch, epochs=1, batch_size=1) return model + + class normalization_test(test.TestCase): - @tf_test_util.run_all_in_graph_and_eager_modes def test_weights(self): + #Check if weights get initialized layer = GroupNormalization(groups=1,scale=False, center=False) layer.build((None, 3, 4)) self.assertEqual(len(layer.trainable_weights), 0) @@ -55,31 +57,62 @@ def test_weights(self): self.assertEqual(len(layer.trainable_weights),2) self.assertEqual(len(layer.weights),2) - @tf_test_util.run_all_in_graph_and_eager_modes + def test_groupnorm_flat(self): + #Check basic usage of groupnorm_flat # Testing for 1 == LayerNorm, 16 == GroupNorm, -1 == InstanceNorm groups=[-1,16,1] + shape=(64,) for i in groups: - - model=create_and_fit_Sequential_model(GroupNormalization(input_shape=(64,),groups=i)) + model=create_and_fit_Sequential_model(GroupNormalization(groups=i),shape) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) - @tf_test_util.run_all_in_graph_and_eager_modes + + def test_layernorm_flat(self): + # Check basic usage of layernorm + model=create_and_fit_Sequential_model(LayerNormalization(),(64,)) + self.assertTrue(hasattr(model.layers[0],'gamma')) + self.assertTrue(hasattr(model.layers[0],'beta')) + + + def test_instancenorm_flat(self): + # Check basic usage of instancenorm + model=create_and_fit_Sequential_model(InstanceNormalization(),(64,)) + self.assertTrue(hasattr(model.layers[0],'gamma')) + self.assertTrue(hasattr(model.layers[0],'beta')) + + + def test_initializer(self): + # Check if the initializer for gamma and beta is working correctly + + model=create_and_fit_Sequential_model(GroupNormalization(groups=32, + beta_initializer='random_normal', + beta_constraint='NonNeg', + gamma_initializer='random_normal', + gamma_constraint='NonNeg'), + (64,)) + + weights=np.array(model.layers[0].get_weights()) + negativ=weights[weights<0.0] + + self.assertTrue(len(weights)==0) + + def test_groupnorm_conv(self): + # Check if Axis is working for CONV nets # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm - #groups=[1,5,-1] - groups=[1] + groups=[-1,5,1] for i in groups: - model = keras.models.Sequential() - model.add(GroupNormalization( - input_shape=(20,20,3,),groups=i)) - + model.add(GroupNormalization(axis=1,groups=i,input_shape=(20,20,3))) model.add(keras.layers.Conv2D(5, (1, 1), padding='same')) - + model.add(keras.layers.Flatten()) + model.add(keras.layers.Dense(1,activation='softmax')) model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') - model.fit(np.random.random((10,20, 20, 3))) + x=np.random.randint(1000,size=(10,20, 20, 3)) + y=np.random.randint(1000,size=(10,1)) + a=model.fit(x=x,y=y,epochs=1) self.assertTrue(hasattr(model.layers[0], 'gamma')) From 0b041622829386ceef0d2e3842b53a12b2973960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Kr=C3=B6ger?= Date: Mon, 18 Feb 2019 00:19:31 +0100 Subject: [PATCH 10/26] Update maxout.py --- tensorflow_addons/layers/python/maxout.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow_addons/layers/python/maxout.py b/tensorflow_addons/layers/python/maxout.py index 55ffe57ebc..de7c697010 100644 --- a/tensorflow_addons/layers/python/maxout.py +++ b/tensorflow_addons/layers/python/maxout.py @@ -27,6 +27,7 @@ @keras_utils.register_keras_custom_object +class Maxout(Layer): """Applies Maxout to the input. "Maxout Networks" Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron From 095d91ee4247f84f53cf069a910ce5edba25d500 Mon Sep 17 00:00:00 2001 From: Smokrow Date: Mon, 18 Feb 2019 00:30:00 +0100 Subject: [PATCH 11/26] small change to variable name --- tensorflow_addons/layers/python/normalizations_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index 2e1035b34f..a68f4c8ef1 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -95,8 +95,9 @@ def test_initializer(self): weights=np.array(model.layers[0].get_weights()) negativ=weights[weights<0.0] - - self.assertTrue(len(weights)==0) + print("------------------------------------------------------") + print(negativ) + self.assertTrue(len(negativ)==0) def test_groupnorm_conv(self): From 3b6d4e66c5694660a7d2d0d4cadcef1fa666dd4f Mon Sep 17 00:00:00 2001 From: smokrow Date: Sun, 24 Feb 2019 12:58:57 +0100 Subject: [PATCH 12/26] cleaned BUILD file --- tensorflow_addons/layers/BUILD | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/tensorflow_addons/layers/BUILD b/tensorflow_addons/layers/BUILD index cad1dc0905..caad2f5a22 100644 --- a/tensorflow_addons/layers/BUILD +++ b/tensorflow_addons/layers/BUILD @@ -8,15 +8,16 @@ py_library( "__init__.py", "python/__init__.py", "python/maxout.py", + "python/normalizations.py", "python/poincare.py", - "python/wrappers.py", - "python/normalizations.py" + "python/wrappers.py" ]), srcs_version = "PY2AND3", ) py_test( name = "layers_wrappers_py_test", + size= "small", srcs = [ "python/wrappers_test.py", ], @@ -40,24 +41,15 @@ py_test( ], ) -py_test( - name = "poincare_py_test", - size = "small", - srcs = [ - "python/poincare_test.py", - ], - main = "python/poincare_test.py", - srcs_version = "PY2AND3", -) - py_test( name = "layers_normalizations_py_test", + size= "small", srcs = [ "python/normalizations_test.py", ], main = "python/normalizations_test.py", + srcs_version = "PY2AND3", deps = [ ":layers_py", ], - srcs_version = "PY2AND3", ) From b288cca37e78447ab543448e2b5beeff6efcd791 Mon Sep 17 00:00:00 2001 From: smokrow Date: Sun, 24 Feb 2019 13:09:52 +0100 Subject: [PATCH 13/26] cleaned docstring --- .../layers/python/normalizations.py | 46 +++++++++++++------ 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 52251557e8..32a69d6528 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -20,22 +20,26 @@ from tensorflow.keras import backend as K from tensorflow.python.ops import nn + class GroupNormalization(Layer): """Group normalization layer. + Group Normalization divides the channels into groups and computes - within each group - the mean and variance for normalization. + within each group the mean and variance for normalization. Group Normalization's computation is independent - of batch sizes, and its accuracy is stable in a wide range of batch sizes. + of batch sizes, and its accuracy is stable in a wide range of batch sizes. + Relation to Layer Normalization: If the number of groups is set to 1, then this operation becomes identical to Layer Normalization. + Relation to Instance Normalization: If the number of groups is set to the input dimension (number of groups is equal to number of channels), then this operation becomes identical to Instance Normalization. - # Arguments + + Arguments groups: Integer, the number of groups for Group Normalization. Can be in the range [1, N] where N is the input dimension. The input dimension must be divisible by the number of groups. @@ -58,13 +62,15 @@ class GroupNormalization(Layer): gamma_regularizer: Optional regularizer for the gamma weight. beta_constraint: Optional constraint for the beta weight. gamma_constraint: Optional constraint for the gamma weight. - # Input shape + + Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. - # Output shape + + Output shape Same shape as input. - # References + References - [Group Normalization](https://arxiv.org/abs/1803.08494) """ @@ -205,11 +211,13 @@ def compute_output_shape(self, input_shape): class LayerNormalization(GroupNormalization): """Layer normalization layer. + Layer Normalization is an specific case of ```GroupNormalization```since it normalizes all features of a layer. The Groupsize is 1. Layer Normalization's computation is independent of batch sizes, and its accuracy is stable in a wide range of batch sizes. - # Arguments + + Arguments axis: Integer, the axis that should be normalized (typically the features axis). For instance, after a `Conv2D` layer with @@ -229,13 +237,16 @@ class LayerNormalization(GroupNormalization): gamma_regularizer: Optional regularizer for the gamma weight. beta_constraint: Optional constraint for the beta weight. gamma_constraint: Optional constraint for the gamma weight. - # Input shape + + Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. - # Output shape + + Output shape Same shape as input. - # References + + References - [Layer Normalization](https://arxiv.org/abs/1607.06450) """ def __init__(self,**kwargs): @@ -244,11 +255,13 @@ def __init__(self,**kwargs): class InstanceNormalization(GroupNormalization): """Instance normalization layer. + Instance Normalization is an specific case of ```GroupNormalization```since it normalizes all features of one channel. The Groupsize is equal to the channel size. Instance Normalization's computation is independent of batch sizes, and its accuracy is stable in a wide range of batch sizes. - # Arguments + + Arguments axis: Integer, the axis that should be normalized (typically the features axis). For instance, after a `Conv2D` layer with @@ -268,13 +281,16 @@ class InstanceNormalization(GroupNormalization): gamma_regularizer: Optional regularizer for the gamma weight. beta_constraint: Optional constraint for the beta weight. gamma_constraint: Optional constraint for the gamma weight. - # Input shape + + Input shape Arbitrary. Use the keyword argument `input_shape` (tuple of integers, does not include the samples axis) when using this layer as the first layer in a model. - # Output shape + + Output shape Same shape as input. - # References + + References - [Layer Normalization](https://arxiv.org/abs/1607.06450) """ def __init__(self,**kwargs): From b7e3d779bb117b43afc908a5a573d80515b9bc8d Mon Sep 17 00:00:00 2001 From: smokrow Date: Sun, 24 Feb 2019 18:10:42 +0100 Subject: [PATCH 14/26] did some refactoring --- .../layers/python/normalizations.py | 94 +++++++++++++------ 1 file changed, 67 insertions(+), 27 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 32a69d6528..914e56b29b 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -15,9 +15,12 @@ # Orginal implementation from keras_contrib/layer/normalization -from tensorflow.keras.layers import Layer, InputSpec -from tensorflow.keras import initializers, regularizers, constraints from tensorflow.keras import backend as K +from tensorflow.keras import constraints +from tensorflow.keras import initializers +from tensorflow.keras import regularizers +from tensorflow.keras.layers import InputSpec +from tensorflow.keras.layers import Layer from tensorflow.python.ops import nn @@ -102,16 +105,35 @@ def __init__(self, self.gamma_constraint = constraints.get(gamma_constraint) def build(self, input_shape): - dim = input_shape[self.axis] + self._check_if_input_shape_is_None(input_shape) + self._set_number_of_groups_for_instance_norm(input_shape) + self._check_size_of_dimensions(input_shape) + self._create_input_spec(input_shape) + + self._add_gamma_weight(input_shape) + self._add_beta_weight(input_shape) + self.built = True + + def _check_if_input_shape_is_None(self, input_shape): + dim = input_shape[self.axis] if dim is None: raise ValueError('Axis ' + str(self.axis) + ' of ' 'input tensor should have a defined dimension ' 'but the layer received an input with shape ' + str(input_shape) + '.') + + + def _set_number_of_groups_for_instance_norm(self, input_shape): + dim=input_shape[self.axis] + if self.groups==-1: self.groups=dim + + def _check_size_of_dimensions(self,input_shape): + + dim=input_shape[self.axis] if dim < self.groups: raise ValueError('Number of groups (' + str(self.groups) + ') cannot be ' 'more than the number of channels (' + @@ -122,9 +144,18 @@ def build(self, input_shape): 'multiple of the number of channels (' + str(dim) + ').') + + def _create_input_spec(self,input_shape): + + dim=input_shape[self.axis] self.input_spec = InputSpec(ndim=len(input_shape), axes={self.axis: dim}) - shape = (dim,) + + + def _add_gamma_weight(self,input_shape): + + dim=input_shape[self.axis] + shape=(dim,) if self.scale: self.gamma = self.add_weight(shape=shape, @@ -134,6 +165,12 @@ def build(self, input_shape): constraint=self.gamma_constraint) else: self.gamma = None + + def _add_beta_weight(self,input_shape): + + dim=input_shape[self.axis] + shape=(dim,) + if self.center: self.beta = self.add_weight(shape=shape, name='beta', @@ -142,47 +179,50 @@ def build(self, input_shape): constraint=self.beta_constraint) else: self.beta = None - self.built = True - def call(self, inputs): - input_shape = K.int_shape(inputs) - tensor_input_shape = K.shape(inputs) - # Prepare broadcasting shape. - reduction_axes = list(range(len(input_shape))) - del reduction_axes[self.axis] + def _create_broadcast_shape(self,input_shape): broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] // self.groups broadcast_shape.insert(1, self.groups) + return broadcast_shape + - reshape_group_shape = K.shape(inputs) - group_axes = [reshape_group_shape[i] for i in range(len(input_shape))] + def _create_group_shape(self,input_shape): + + group_axes = [tensor_input_shape[i] for i in range(len(input_shape))] group_axes[self.axis] = input_shape[self.axis] // self.groups group_axes.insert(1, self.groups) # reshape inputs to new group shape group_shape = [group_axes[0], self.groups] + group_axes[2:] group_shape = K.stack(group_shape) - inputs = K.reshape(inputs, group_shape) + return group_shape + + + def call(self, inputs): + input_shape = K.int_shape(inputs) + tensor_input_shape = K.shape(inputs) + + reshaped_inputs = K.reshape(inputs, group_shape) group_reduction_axes = list(range(len(group_axes))) mean, variance = nn.moments(inputs, group_reduction_axes[2:], keep_dims=True) inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) - # prepare broadcast shape - inputs = K.reshape(inputs, group_shape) - - outputs = inputs - - # In this case we must explicitly broadcast all parameters. - if self.scale: - broadcast_gamma = K.reshape(self.gamma, broadcast_shape) - outputs = outputs * broadcast_gamma - - if self.center: - broadcast_beta = K.reshape(self.beta, broadcast_shape) - outputs = outputs + broadcast_beta + outputs = K.reshape(inputs, group_shape) + + if self.scale or self.center: + broadcast_shape=self._create_broadcast_shape(input_shape) + # In this case we must explicitly broadcast all parameters. + if self.scale: + broadcast_gamma = K.reshape(self.gamma, broadcast_shape) + outputs = outputs * broadcast_gamma + + if self.center: + broadcast_beta = K.reshape(self.beta, broadcast_shape) + outputs = outputs + broadcast_beta # finally we reshape the output back to the input shape outputs = K.reshape(outputs, tensor_input_shape) From 55cb1580129f3da22f5ccf4f36dfd9a280b3fd14 Mon Sep 17 00:00:00 2001 From: smokrow Date: Tue, 26 Feb 2019 10:44:03 +0100 Subject: [PATCH 15/26] refactored call function --- .../layers/python/normalizations.py | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 914e56b29b..ea363aeffb 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -188,41 +188,42 @@ def _create_broadcast_shape(self,input_shape): return broadcast_shape - def _create_group_shape(self,input_shape): + def _reshape_into_groups(self,input_shape): - group_axes = [tensor_input_shape[i] for i in range(len(input_shape))] - group_axes[self.axis] = input_shape[self.axis] // self.groups - group_axes.insert(1, self.groups) - - # reshape inputs to new group shape - group_shape = [group_axes[0], self.groups] + group_axes[2:] + group_shape = [tensor_input_shape[i] for i in range(len(input_shape))] + group_shape[self.axis] = input_shape[self.axis] // self.groups + group_shape.insert(1, self.groups) group_shape = K.stack(group_shape) - return group_shape + reshaped_inputs = K.reshape(inputs, group_shape) + return reshaped_inputs, group_shape + def _apply_scale_or_center(self,inputs, input_shape): + broadcast_shape=self._create_broadcast_shape(input_shape) + if self.scale: + broadcast_gamma = K.reshape(self.gamma, broadcast_shape) + outputs = outputs * broadcast_gamma + + if self.center: + broadcast_beta = K.reshape(self.beta, broadcast_shape) + outputs = outputs + broadcast_beta + return outputs def call(self, inputs): + input_shape = K.int_shape(inputs) tensor_input_shape = K.shape(inputs) - reshaped_inputs = K.reshape(inputs, group_shape) + reshaped_inputs, group_shape=self._reshape_into_groups(input_shape) - group_reduction_axes = list(range(len(group_axes))) - mean, variance = nn.moments(inputs, group_reduction_axes[2:], + group_reduction_axes = list(range(len(group_shape))) + mean, variance = nn.moments(reshaped_inputs, group_reduction_axes[2:], keep_dims=True) - inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) + inputs = (reshaped_inputs - mean) / (K.sqrt(variance + self.epsilon)) outputs = K.reshape(inputs, group_shape) if self.scale or self.center: - broadcast_shape=self._create_broadcast_shape(input_shape) - # In this case we must explicitly broadcast all parameters. - if self.scale: - broadcast_gamma = K.reshape(self.gamma, broadcast_shape) - outputs = outputs * broadcast_gamma - - if self.center: - broadcast_beta = K.reshape(self.beta, broadcast_shape) - outputs = outputs + broadcast_beta + outputs = self._apply_scale_or_center(outputs,input_shape) # finally we reshape the output back to the input shape outputs = K.reshape(outputs, tensor_input_shape) From 540492ecac0d59efcf518fdc691b291b178b8177 Mon Sep 17 00:00:00 2001 From: smokrow Date: Fri, 1 Mar 2019 20:05:09 +0100 Subject: [PATCH 16/26] fixed BUILD file --- tensorflow_addons/layers/BUILD | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow_addons/layers/BUILD b/tensorflow_addons/layers/BUILD index e2b6b4220e..8d3fea362b 100644 --- a/tensorflow_addons/layers/BUILD +++ b/tensorflow_addons/layers/BUILD @@ -11,11 +11,11 @@ py_library( "python/normalizations.py", "python/poincare.py", "python/wrappers.py" - ]), + ], srcs_version = "PY2AND3", deps = [ "//tensorflow_addons/utils:utils_py", - ], + ] ) py_test( @@ -28,7 +28,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":layers_py", - ], + ] ) py_test( @@ -41,7 +41,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":layers_py", - ], + ] ) py_test( @@ -54,5 +54,5 @@ py_test( srcs_version = "PY2AND3", deps = [ ":layers_py", - ], + ] ) From f980aa5ed58325233c81ba911afaf6a32ec2ddff Mon Sep 17 00:00:00 2001 From: smokrow Date: Fri, 1 Mar 2019 20:06:07 +0100 Subject: [PATCH 17/26] implemented batch_normalization from tf nn --- .../layers/python/normalizations.py | 147 ++++++++++-------- 1 file changed, 85 insertions(+), 62 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 0ac9dbfa37..4c90352eb7 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -104,6 +104,7 @@ def __init__(self, self.beta_constraint = constraints.get(beta_constraint) self.gamma_constraint = constraints.get(gamma_constraint) + def build(self, input_shape): self._check_if_input_shape_is_None(input_shape) @@ -116,6 +117,83 @@ def build(self, input_shape): self.built = True super(GroupNormalization, self).build(input_shape) + + def call(self, inputs): + + input_shape = K.int_shape(inputs) + tensor_input_shape = K.shape(inputs) + + reshaped_inputs, group_shape=self._reshape_into_groups(inputs,input_shape,tensor_input_shape) + + normalized_inputs = self._apply_normalization(reshaped_inputs, input_shape) + + outputs = K.reshape(normalized_inputs, tensor_input_shape) + + return outputs + + + def get_config(self): + config = { + 'groups': self.groups, + 'axis': self.axis, + 'epsilon': self.epsilon, + 'center': self.center, + 'scale': self.scale, + 'beta_initializer': initializers.serialize(self.beta_initializer), + 'gamma_initializer': initializers.serialize(self.gamma_initializer), + 'beta_regularizer': regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': constraints.serialize(self.beta_constraint), + 'gamma_constraint': constraints.serialize(self.gamma_constraint) + } + base_config = super(GroupNormalization, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + + def compute_output_shape(self, input_shape): + return input_shape + + + def _reshape_into_groups(self,inputs,input_shape,tensor_input_shape): + + group_shape = [tensor_input_shape[i] for i in range(len(input_shape))] + group_shape[self.axis] = input_shape[self.axis] // self.groups + group_shape.insert(1, self.groups) + group_shape = K.stack(group_shape) + reshaped_inputs = K.reshape(inputs, group_shape) + return reshaped_inputs, group_shape + + + def _apply_normalization(self, reshaped_inputs , input_shape): + + group_shape = K.int_shape(reshaped_inputs) + group_reduction_axes = list(range(len(group_shape))) + # Remember the ordering of the tensor is [batch, group , steps]. Jump the first 2 to calculate the variance and the mean + mean, variance = nn.moments(reshaped_inputs, group_reduction_axes[2:], + keep_dims=True) + + gamma,beta= self._get_reshaped_weights(input_shape) + normalized_inputs= nn.batch_normalization(reshaped_inputs, + mean = mean, + variance = variance, + scale = gamma, + offset = beta, + variance_epsilon = self.epsilon) + return normalized_inputs + + + def _get_reshaped_weights(self, input_shape): + broadcast_shape=self._create_broadcast_shape(input_shape) + gamma=None + beta=None + if self.scale: + gamma = K.reshape(self.gamma, broadcast_shape) + + if self.center: + beta = K.reshape(self.beta, broadcast_shape) + return gamma, beta + + def _check_if_input_shape_is_None(self, input_shape): dim = input_shape[self.axis] if dim is None: @@ -189,68 +267,6 @@ def _create_broadcast_shape(self,input_shape): return broadcast_shape - def _reshape_into_groups(self,input_shape): - - group_shape = [tensor_input_shape[i] for i in range(len(input_shape))] - group_shape[self.axis] = input_shape[self.axis] // self.groups - group_shape.insert(1, self.groups) - group_shape = K.stack(group_shape) - reshaped_inputs = K.reshape(inputs, group_shape) - return reshaped_inputs, group_shape - - def _apply_scale_or_center(self,inputs, input_shape): - broadcast_shape=self._create_broadcast_shape(input_shape) - if self.scale: - broadcast_gamma = K.reshape(self.gamma, broadcast_shape) - outputs = outputs * broadcast_gamma - - if self.center: - broadcast_beta = K.reshape(self.beta, broadcast_shape) - outputs = outputs + broadcast_beta - return outputs - - def call(self, inputs): - - input_shape = K.int_shape(inputs) - tensor_input_shape = K.shape(inputs) - - reshaped_inputs, group_shape=self._reshape_into_groups(input_shape) - - group_reduction_axes = list(range(len(group_shape))) - mean, variance = nn.moments(reshaped_inputs, group_reduction_axes[2:], - keep_dims=True) - inputs = (reshaped_inputs - mean) / (K.sqrt(variance + self.epsilon)) - - outputs = K.reshape(inputs, group_shape) - - if self.scale or self.center: - outputs = self._apply_scale_or_center(outputs,input_shape) - - # finally we reshape the output back to the input shape - outputs = K.reshape(outputs, tensor_input_shape) - - return outputs - - def get_config(self): - config = { - 'groups': self.groups, - 'axis': self.axis, - 'epsilon': self.epsilon, - 'center': self.center, - 'scale': self.scale, - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'gamma_initializer': initializers.serialize(self.gamma_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'gamma_constraint': constraints.serialize(self.gamma_constraint) - } - base_config = super(GroupNormalization, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - class LayerNormalization(GroupNormalization): """Layer normalization layer. @@ -292,6 +308,8 @@ class LayerNormalization(GroupNormalization): - [Layer Normalization](https://arxiv.org/abs/1607.06450) """ def __init__(self,**kwargs): + if "groups" in kwargs: + tf.logging.warning("The given value for groups will be overwritten.") kwargs["groups"]=1 super(LayerNormalization,self).__init__(**kwargs) @@ -336,5 +354,10 @@ class InstanceNormalization(GroupNormalization): - [Layer Normalization](https://arxiv.org/abs/1607.06450) """ def __init__(self,**kwargs): + + if "groups" in kwargs: + tf.logging.warning("The given value for groups will be overwritten.") + kwargs["groups"]=-1 super(InstanceNormalization,self).__init__(**kwargs) + From 576961d2027d980a32a06fac72a117e7951f0084 Mon Sep 17 00:00:00 2001 From: smokrow Date: Mon, 4 Mar 2019 11:44:22 +0100 Subject: [PATCH 18/26] added normalization and reshape test --- .../layers/python/normalizations.py | 6 +- .../layers/python/normalizations_test.py | 179 +++++++++++++----- 2 files changed, 139 insertions(+), 46 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 4c90352eb7..72b45f9f25 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -15,6 +15,7 @@ # Orginal implementation from keras_contrib/layer/normalization +import tensorflow as tf from tensorflow.keras import backend as K from tensorflow.keras import constraints from tensorflow.keras import initializers @@ -78,7 +79,7 @@ class GroupNormalization(Layer): """ def __init__(self, - groups=32, + groups=2, axis=-1, epsilon=1e-5, center=True, @@ -165,7 +166,7 @@ def _reshape_into_groups(self,inputs,input_shape,tensor_input_shape): def _apply_normalization(self, reshaped_inputs , input_shape): - + group_shape = K.int_shape(reshaped_inputs) group_reduction_axes = list(range(len(group_shape))) # Remember the ordering of the tensor is [batch, group , steps]. Jump the first 2 to calculate the variance and the mean @@ -360,4 +361,3 @@ def __init__(self,**kwargs): kwargs["groups"]=-1 super(InstanceNormalization,self).__init__(**kwargs) - diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index a68f4c8ef1..3bc472ae91 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -13,36 +13,89 @@ # limitations under the License. # ============================================================================= -from tensorflow_addons.layers.python.normalizations import GroupNormalization,LayerNormalization,InstanceNormalization import numpy as np import scipy as scipy import tensorflow as tf from tensorflow import keras as keras +from tensorflow_addons.layers.python.normalizations import GroupNormalization +from tensorflow_addons.layers.python.normalizations import InstanceNormalization +from tensorflow_addons.layers.python.normalizations import LayerNormalization +from tensorflow.python.framework import test_util as tf_test_util +from tensorflow.python.keras import keras_parameterized +from tensorflow.python.keras import testing_utils +from tensorflow.python.keras.layers import normalization from tensorflow.python.training.rmsprop import RMSPropOptimizer from tensorflow.python.platform import test -from tensorflow.python.framework import test_util as tf_test_util +from tensorflow.python.training import gradient_descent -def create_and_fit_Sequential_model(layer,shape): - #Helperfunction for quick evaluation - model=keras.models.Sequential() +def create_and_fit_Sequential_model(layer, shape): + # Helperfunction for quick evaluation + model = keras.models.Sequential() model.add(layer) model.add(keras.layers.Dense(32)) model.add(keras.layers.Dense(1)) - model.compile(optimizer=RMSPropOptimizer(0.01),loss="categorical_crossentropy") - layer_shape=(10,)+shape - input_batch=np.random.rand(*layer_shape) - output_batch=np.random.rand(*(10,1)) - model.fit(x=input_batch,y=output_batch, epochs=1, batch_size=1) + model.compile(optimizer=RMSPropOptimizer(0.01), + loss="categorical_crossentropy") + layer_shape = (10,) + shape + input_batch = np.random.rand(*layer_shape) + output_batch = np.random.rand(*(10, 1)) + model.fit(x=input_batch, y=output_batch, epochs=1, batch_size=1) return model class normalization_test(test.TestCase): +# ------------Tests to ensure proper inheritance. If these suceed you can test for Instance norm and Layernorm by setting Groupnorm groups = -1 or 1 + def test_inheritance(self): + self.assertTrue(issubclass(LayerNormalization, GroupNormalization)) + self.assertTrue(issubclass(InstanceNormalization, GroupNormalization)) + self.assertTrue(LayerNormalization.build==GroupNormalization.build) + self.assertTrue(InstanceNormalization.build==GroupNormalization.build) + self.assertTrue(LayerNormalization.call==GroupNormalization.call) + self.assertTrue(InstanceNormalization.call==GroupNormalization.call) + + + def test_groups_after_init(self): + layers=InstanceNormalization() + self.assertTrue(layers.groups==-1) + layers=LayerNormalization() + self.assertTrue(layers.groups==1) +# ----------------------------------------------------------------------------------------------------------------------------------------- + + def test_reshape(self): + def run_reshape_test(axis, group, input_shape, expected_shape): + + group_layer=GroupNormalization(groups=group,axis=axis) + group_layer._set_number_of_groups_for_instance_norm(input_shape) + + inputs=np.ones(input_shape) + tensor_input_shape=tf.convert_to_tensor(input_shape) + reshaped_inputs, group_shape=group_layer._reshape_into_groups(inputs,(10,10,10),tensor_input_shape) + for i in range(len(expected_shape)): + self.assertEqual(int(group_shape[i]),expected_shape[i]) + + input_shape=(10,10,10) + expected_shape=[10,5,10,2] + run_reshape_test(2,5,input_shape,expected_shape) + + input_shape=(10,10,10) + expected_shape=[10,2,5,10] + run_reshape_test(1,2,input_shape,expected_shape) + + input_shape=(10,10,10) + expected_shape=[10,10,1,10] + run_reshape_test(1,-1,input_shape,expected_shape) + + input_shape=(10,10,10) + expected_shape=[10,1,10,10] + run_reshape_test(1,1,input_shape,expected_shape) + + @tf_test_util.run_in_graph_and_eager_modes def test_weights(self): - #Check if weights get initialized - layer = GroupNormalization(groups=1,scale=False, center=False) + # Check if weights get initialized + layer = GroupNormalization(groups=1, scale=False, center=False) layer.build((None, 3, 4)) self.assertEqual(len(layer.trainable_weights), 0) self.assertEqual(len(layer.weights), 0) @@ -54,66 +107,106 @@ def test_weights(self): layer = InstanceNormalization() layer.build((None, 3, 4)) - self.assertEqual(len(layer.trainable_weights),2) - self.assertEqual(len(layer.weights),2) + self.assertEqual(len(layer.trainable_weights), 2) + self.assertEqual(len(layer.weights), 2) + + def test_apply_normalization(self): + + input_shape = (1,4) + expected_shape= (1,2,2) + reshaped_inputs= tf.constant([[[2.0,2.0],[3.0,3.0]]]) + layer=GroupNormalization(groups=2,axis=1,scale=False, center= False) + normalized_input=layer._apply_normalization(reshaped_inputs, input_shape) + self.assertTrue(tf.reduce_all(tf.equal(normalized_input,tf.constant([[[0.0,0.0],[0.0,0.0]]])))) + + + @tf_test_util.run_in_graph_and_eager_modes def test_groupnorm_flat(self): - #Check basic usage of groupnorm_flat + # Check basic usage of groupnorm_flat # Testing for 1 == LayerNorm, 16 == GroupNorm, -1 == InstanceNorm - groups=[-1,16,1] - shape=(64,) + + groups = [-1, 16, 1] + shape = (64,) for i in groups: - model=create_and_fit_Sequential_model(GroupNormalization(groups=i),shape) + model = create_and_fit_Sequential_model( + GroupNormalization(groups=i), shape) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) + @tf_test_util.run_in_graph_and_eager_modes def test_layernorm_flat(self): # Check basic usage of layernorm - model=create_and_fit_Sequential_model(LayerNormalization(),(64,)) - self.assertTrue(hasattr(model.layers[0],'gamma')) - self.assertTrue(hasattr(model.layers[0],'beta')) + model = create_and_fit_Sequential_model(LayerNormalization(), (64,)) + self.assertTrue(hasattr(model.layers[0], 'gamma')) + self.assertTrue(hasattr(model.layers[0], 'beta')) + + @tf_test_util.run_in_graph_and_eager_modes def test_instancenorm_flat(self): # Check basic usage of instancenorm - model=create_and_fit_Sequential_model(InstanceNormalization(),(64,)) - self.assertTrue(hasattr(model.layers[0],'gamma')) - self.assertTrue(hasattr(model.layers[0],'beta')) + + model = create_and_fit_Sequential_model(InstanceNormalization(), (64,)) + self.assertTrue(hasattr(model.layers[0], 'gamma')) + self.assertTrue(hasattr(model.layers[0], 'beta')) + @tf_test_util.run_in_graph_and_eager_modes def test_initializer(self): # Check if the initializer for gamma and beta is working correctly - model=create_and_fit_Sequential_model(GroupNormalization(groups=32, - beta_initializer='random_normal', - beta_constraint='NonNeg', - gamma_initializer='random_normal', - gamma_constraint='NonNeg'), - (64,)) - - weights=np.array(model.layers[0].get_weights()) - negativ=weights[weights<0.0] - print("------------------------------------------------------") - print(negativ) - self.assertTrue(len(negativ)==0) - + layer=GroupNormalization(groups=32, + beta_initializer='random_normal', + beta_constraint='NonNeg', + gamma_initializer='random_normal', + gamma_constraint='NonNeg') + + model = create_and_fit_Sequential_model(layer,(64,)) + + weights = np.array(model.layers[0].get_weights()) + negativ = weights[weights < 0.0] + self.assertTrue(len(negativ) == 0) + + + @tf_test_util.run_in_graph_and_eager_modes + def test_regularizations(self): + + layer = GroupNormalization( + gamma_regularizer='l1', + beta_regularizer='l1', + groups=4, + axis=2) + layer.build((None, 4, 4)) + self.assertEqual(len(layer.losses), 2) + max_norm = keras.constraints.max_norm + layer = GroupNormalization( + gamma_constraint=max_norm, + beta_constraint=max_norm) + layer.build((None, 3, 4)) + self.assertEqual(layer.gamma.constraint, max_norm) + self.assertEqual(layer.beta.constraint, max_norm) + + @tf_test_util.run_in_graph_and_eager_modes def test_groupnorm_conv(self): # Check if Axis is working for CONV nets # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm - groups=[-1,5,1] + + groups = [-1, 5, 1] for i in groups: model = keras.models.Sequential() - model.add(GroupNormalization(axis=1,groups=i,input_shape=(20,20,3))) + model.add(GroupNormalization( + axis=1, groups=i, input_shape=(20, 20, 3))) model.add(keras.layers.Conv2D(5, (1, 1), padding='same')) model.add(keras.layers.Flatten()) - model.add(keras.layers.Dense(1,activation='softmax')) + model.add(keras.layers.Dense(1, activation='softmax')) model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') - x=np.random.randint(1000,size=(10,20, 20, 3)) - y=np.random.randint(1000,size=(10,1)) - a=model.fit(x=x,y=y,epochs=1) + x = np.random.randint(1000, size=(10, 20, 20, 3)) + y = np.random.randint(1000, size=(10, 1)) + a = model.fit(x=x, y=y, epochs=1) self.assertTrue(hasattr(model.layers[0], 'gamma')) From 918eeb7a1af75b35ce0957db74a3439d7c0444ea Mon Sep 17 00:00:00 2001 From: smokrow Date: Mon, 4 Mar 2019 21:16:15 +0100 Subject: [PATCH 19/26] added axis check --- tensorflow_addons/layers/python/normalizations.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 72b45f9f25..e49df0ac16 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -104,7 +104,7 @@ def __init__(self, self.gamma_regularizer = regularizers.get(gamma_regularizer) self.beta_constraint = constraints.get(beta_constraint) self.gamma_constraint = constraints.get(gamma_constraint) - + self._check_axis() def build(self, input_shape): @@ -224,7 +224,10 @@ def _check_size_of_dimensions(self,input_shape): 'multiple of the number of channels (' + str(dim) + ').') + def _check_axis(self): + if self.axis==0: + raise ValueError("You are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead") def _create_input_spec(self,input_shape): dim=input_shape[self.axis] From d2c1afdf51f17e554c6911a2553988294292c9c9 Mon Sep 17 00:00:00 2001 From: smokrow Date: Mon, 4 Mar 2019 21:16:40 +0100 Subject: [PATCH 20/26] added manual layer test --- .../layers/python/normalizations_test.py | 101 ++++++++++++++---- 1 file changed, 83 insertions(+), 18 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index 3bc472ae91..841c99595f 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -29,20 +29,7 @@ from tensorflow.python.training import gradient_descent -def create_and_fit_Sequential_model(layer, shape): - # Helperfunction for quick evaluation - model = keras.models.Sequential() - model.add(layer) - model.add(keras.layers.Dense(32)) - model.add(keras.layers.Dense(1)) - - model.compile(optimizer=RMSPropOptimizer(0.01), - loss="categorical_crossentropy") - layer_shape = (10,) + shape - input_batch = np.random.rand(*layer_shape) - output_batch = np.random.rand(*(10, 1)) - model.fit(x=input_batch, y=output_batch, epochs=1, batch_size=1) - return model + class normalization_test(test.TestCase): @@ -92,6 +79,79 @@ def run_reshape_test(axis, group, input_shape, expected_shape): expected_shape=[10,1,10,10] run_reshape_test(1,1,input_shape,expected_shape) + def test_call_function(self): + + self._test_specific_layer(tf.random.normal((10,10,10)),1,1,False,True) + + def _test_specific_layer(self,inputs, axis, groups, center, scale): + + input_shape=inputs.shape + + layer=GroupNormalization(axis=axis,groups=groups,center=center,scale=scale) + + model= keras.models.Sequential() + model.add(layer) + + outputs=model.predict(inputs) + self.assertFalse(np.isnan(outputs).any()) + + if groups is -1: + groups=input_shape[axis] + np_inputs=inputs.numpy() + reshaped_dims=list(np_inputs.shape) + reshaped_dims[axis]=reshaped_dims[axis]//groups + reshaped_dims.insert(1,groups) + #reshaped_dims=np.array([reshaped_dims[0],groups,i for i in reshaped_dims[1:]]) + reshaped_inputs=np.reshape(np_inputs,tuple(reshaped_dims)) + mean = np.mean(reshaped_inputs, axis=tuple(range(2,len(reshaped_dims))),keepdims=True) + variance = np.var(reshaped_inputs,axis=tuple(range(2,len(reshaped_dims))),keepdims=True) + + gamma,beta=layer._get_reshaped_weights(input_shape) + print("GAMMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") + print(gamma.shape) + print(reshaped_dims) + print(np_inputs.shape) + print(gamma) + print(beta) + gamma=np.repeat(gamma, input_shape[0],axis=0) + print("GAMMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") + print(gamma.shape) + if gamma is None: + gamma=1.0 + if beta is None: + beta=0.0 + output_test=[] + + a=np_inputs-mean + output_test=((gamma*a)*(1/np.sqrt(variance+1e-5))+beta) + output_test=np.array(output_test) + print("OOOOOOUTPUUUUUUT") + print(output_test.shape) + output_test=np.reshape(output_test,input_shape.as_list()) + output_test=output_test.flatten() + + + outputs_tf= outputs.flatten() + for i in range(len(output_test)): + + self.assertAlmostEqual(output_test[i],outputs_tf[i],places=5) + return outputs + + def _create_and_fit_Sequential_model(self,layer, shape): + # Helperfunction for quick evaluation + model = keras.models.Sequential() + model.add(layer) + model.add(keras.layers.Dense(32)) + model.add(keras.layers.Dense(1)) + + model.compile(optimizer=RMSPropOptimizer(0.01), + loss="categorical_crossentropy") + layer_shape = (10,) + shape + input_batch = np.random.rand(*layer_shape) + output_batch = np.random.rand(*(10, 1)) + model.fit(x=input_batch, y=output_batch, epochs=1, batch_size=1) + return model + @tf_test_util.run_in_graph_and_eager_modes def test_weights(self): # Check if weights get initialized @@ -119,9 +179,14 @@ def test_apply_normalization(self): normalized_input=layer._apply_normalization(reshaped_inputs, input_shape) self.assertTrue(tf.reduce_all(tf.equal(normalized_input,tf.constant([[[0.0,0.0],[0.0,0.0]]])))) + def test_axis_error(self): + + with self.assertRaises(ValueError): + GroupNormalization(axis=0) + @tf_test_util.run_in_graph_and_eager_modes def test_groupnorm_flat(self): # Check basic usage of groupnorm_flat @@ -130,7 +195,7 @@ def test_groupnorm_flat(self): groups = [-1, 16, 1] shape = (64,) for i in groups: - model = create_and_fit_Sequential_model( + model = self._create_and_fit_Sequential_model( GroupNormalization(groups=i), shape) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) @@ -140,7 +205,7 @@ def test_groupnorm_flat(self): def test_layernorm_flat(self): # Check basic usage of layernorm - model = create_and_fit_Sequential_model(LayerNormalization(), (64,)) + model = self._create_and_fit_Sequential_model(LayerNormalization(), (64,)) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) @@ -149,7 +214,7 @@ def test_layernorm_flat(self): def test_instancenorm_flat(self): # Check basic usage of instancenorm - model = create_and_fit_Sequential_model(InstanceNormalization(), (64,)) + model = self._create_and_fit_Sequential_model(InstanceNormalization(), (64,)) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) @@ -164,7 +229,7 @@ def test_initializer(self): gamma_initializer='random_normal', gamma_constraint='NonNeg') - model = create_and_fit_Sequential_model(layer,(64,)) + model = self._create_and_fit_Sequential_model(layer,(64,)) weights = np.array(model.layers[0].get_weights()) negativ = weights[weights < 0.0] From 4ebd90757e848c6d8020129e21cfe978d3ac0bd9 Mon Sep 17 00:00:00 2001 From: smokrow Date: Tue, 5 Mar 2019 22:29:16 +0100 Subject: [PATCH 21/26] added tests to check normalization with numpy --- .../layers/python/normalizations_test.py | 238 +++++++++--------- 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index 841c99595f..3b83d70fb6 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -29,115 +29,119 @@ from tensorflow.python.training import gradient_descent - - - class normalization_test(test.TestCase): -# ------------Tests to ensure proper inheritance. If these suceed you can test for Instance norm and Layernorm by setting Groupnorm groups = -1 or 1 + # ------------Tests to ensure proper inheritance. If these suceed you can test for Instance norm and Layernorm by setting Groupnorm groups = -1 or 1 def test_inheritance(self): self.assertTrue(issubclass(LayerNormalization, GroupNormalization)) self.assertTrue(issubclass(InstanceNormalization, GroupNormalization)) - self.assertTrue(LayerNormalization.build==GroupNormalization.build) - self.assertTrue(InstanceNormalization.build==GroupNormalization.build) - self.assertTrue(LayerNormalization.call==GroupNormalization.call) - self.assertTrue(InstanceNormalization.call==GroupNormalization.call) - + self.assertTrue(LayerNormalization.build == GroupNormalization.build) + self.assertTrue(InstanceNormalization.build == + GroupNormalization.build) + self.assertTrue(LayerNormalization.call == GroupNormalization.call) + self.assertTrue(InstanceNormalization.call == GroupNormalization.call) def test_groups_after_init(self): - layers=InstanceNormalization() - self.assertTrue(layers.groups==-1) - layers=LayerNormalization() - self.assertTrue(layers.groups==1) + layers = InstanceNormalization() + self.assertTrue(layers.groups == -1) + layers = LayerNormalization() + self.assertTrue(layers.groups == 1) # ----------------------------------------------------------------------------------------------------------------------------------------- def test_reshape(self): def run_reshape_test(axis, group, input_shape, expected_shape): - group_layer=GroupNormalization(groups=group,axis=axis) + group_layer = GroupNormalization(groups=group, axis=axis) group_layer._set_number_of_groups_for_instance_norm(input_shape) - inputs=np.ones(input_shape) - tensor_input_shape=tf.convert_to_tensor(input_shape) - reshaped_inputs, group_shape=group_layer._reshape_into_groups(inputs,(10,10,10),tensor_input_shape) + inputs = np.ones(input_shape) + tensor_input_shape = tf.convert_to_tensor(input_shape) + reshaped_inputs, group_shape = group_layer._reshape_into_groups( + inputs, (10, 10, 10), tensor_input_shape) for i in range(len(expected_shape)): - self.assertEqual(int(group_shape[i]),expected_shape[i]) - - input_shape=(10,10,10) - expected_shape=[10,5,10,2] - run_reshape_test(2,5,input_shape,expected_shape) - - input_shape=(10,10,10) - expected_shape=[10,2,5,10] - run_reshape_test(1,2,input_shape,expected_shape) - - input_shape=(10,10,10) - expected_shape=[10,10,1,10] - run_reshape_test(1,-1,input_shape,expected_shape) - - input_shape=(10,10,10) - expected_shape=[10,1,10,10] - run_reshape_test(1,1,input_shape,expected_shape) - - def test_call_function(self): - - self._test_specific_layer(tf.random.normal((10,10,10)),1,1,False,True) - - def _test_specific_layer(self,inputs, axis, groups, center, scale): - - input_shape=inputs.shape - - layer=GroupNormalization(axis=axis,groups=groups,center=center,scale=scale) - - model= keras.models.Sequential() + self.assertEqual(int(group_shape[i]), expected_shape[i]) + + input_shape = (10, 10, 10) + expected_shape = [10, 5, 10, 2] + run_reshape_test(2, 5, input_shape, expected_shape) + + input_shape = (10, 10, 10) + expected_shape = [10, 2, 5, 10] + run_reshape_test(1, 2, input_shape, expected_shape) + + input_shape = (10, 10, 10) + expected_shape = [10, 10, 1, 10] + run_reshape_test(1, -1, input_shape, expected_shape) + + input_shape = (10, 10, 10) + expected_shape = [10, 1, 10, 10] + run_reshape_test(1, 1, input_shape, expected_shape) + + def test_feature_input(self): + shape = (10, 100) + for center in [True, False]: + for scale in [True, False]: + for groups in [-1, 1, 2, 5]: + self._test_random_shape_on_all_axis_except_batch( + shape, groups, center, scale) + + def test_picture_input(self): + shape = (10, 30, 30, 3) + for center in [True, False]: + for scale in [True, False]: + for groups in [-1, 1, 3]: + self._test_random_shape_on_all_axis_except_batch( + shape, groups, center, scale) + + def _test_random_shape_on_all_axis_except_batch(self, shape, groups, center, scale): + inputs = tf.random.normal((shape)) + for axis in range(1, len(shape)): + self._test_specific_layer(inputs, axis, groups, center, scale) + + def _test_specific_layer(self, inputs, axis, groups, center, scale): + + input_shape = inputs.shape + + # Get Output from Keras model + layer = GroupNormalization( + axis=axis, groups=groups, center=center, scale=scale) + model = keras.models.Sequential() model.add(layer) - - outputs=model.predict(inputs) + outputs = model.predict(inputs) self.assertFalse(np.isnan(outputs).any()) + # Create shapes if groups is -1: - groups=input_shape[axis] - np_inputs=inputs.numpy() - reshaped_dims=list(np_inputs.shape) - reshaped_dims[axis]=reshaped_dims[axis]//groups - reshaped_dims.insert(1,groups) - #reshaped_dims=np.array([reshaped_dims[0],groups,i for i in reshaped_dims[1:]]) - reshaped_inputs=np.reshape(np_inputs,tuple(reshaped_dims)) - mean = np.mean(reshaped_inputs, axis=tuple(range(2,len(reshaped_dims))),keepdims=True) - variance = np.var(reshaped_inputs,axis=tuple(range(2,len(reshaped_dims))),keepdims=True) - - gamma,beta=layer._get_reshaped_weights(input_shape) - print("GAMMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") - print(gamma.shape) - print(reshaped_dims) - print(np_inputs.shape) - print(gamma) - print(beta) - gamma=np.repeat(gamma, input_shape[0],axis=0) - print("GAMMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA") - print(gamma.shape) + groups = input_shape[axis] + np_inputs = inputs.numpy() + reshaped_dims = list(np_inputs.shape) + reshaped_dims[axis] = reshaped_dims[axis] // groups + reshaped_dims.insert(1, groups) + reshaped_inputs = np.reshape(np_inputs, tuple(reshaped_dims)) + + # Calculate mean and variance + mean = np.mean(reshaped_inputs, axis=tuple( + range(2, len(reshaped_dims))), keepdims=True) + variance = np.var(reshaped_inputs, axis=tuple( + range(2, len(reshaped_dims))), keepdims=True) + + # Get gamma and beta initalized by layer + gamma, beta = layer._get_reshaped_weights(input_shape) if gamma is None: - gamma=1.0 + gamma = 1.0 if beta is None: - beta=0.0 - output_test=[] - - a=np_inputs-mean - output_test=((gamma*a)*(1/np.sqrt(variance+1e-5))+beta) - output_test=np.array(output_test) - print("OOOOOOUTPUUUUUUT") - print(output_test.shape) - output_test=np.reshape(output_test,input_shape.as_list()) - output_test=output_test.flatten() - - - outputs_tf= outputs.flatten() - for i in range(len(output_test)): - - self.assertAlmostEqual(output_test[i],outputs_tf[i],places=5) - return outputs - - def _create_and_fit_Sequential_model(self,layer, shape): + beta = 0.0 + + # Get ouput from Numpy + zeroed = reshaped_inputs - mean + rsqrt = 1 / np.sqrt(variance + 1e-5) + output_test = gamma * zeroed * rsqrt + beta + + # compare outputs + output_test = np.reshape(output_test, input_shape.as_list()) + self.assertAlmostEqual(np.mean(output_test - outputs), 0, places=7) + + def _create_and_fit_Sequential_model(self, layer, shape): # Helperfunction for quick evaluation model = keras.models.Sequential() model.add(layer) @@ -154,7 +158,7 @@ def _create_and_fit_Sequential_model(self,layer, shape): @tf_test_util.run_in_graph_and_eager_modes def test_weights(self): - # Check if weights get initialized + # Check if weights get initialized correctly layer = GroupNormalization(groups=1, scale=False, center=False) layer.build((None, 3, 4)) self.assertEqual(len(layer.trainable_weights), 0) @@ -169,24 +173,23 @@ def test_weights(self): layer.build((None, 3, 4)) self.assertEqual(len(layer.trainable_weights), 2) self.assertEqual(len(layer.weights), 2) - + def test_apply_normalization(self): - - input_shape = (1,4) - expected_shape= (1,2,2) - reshaped_inputs= tf.constant([[[2.0,2.0],[3.0,3.0]]]) - layer=GroupNormalization(groups=2,axis=1,scale=False, center= False) - normalized_input=layer._apply_normalization(reshaped_inputs, input_shape) - self.assertTrue(tf.reduce_all(tf.equal(normalized_input,tf.constant([[[0.0,0.0],[0.0,0.0]]])))) + + input_shape = (1, 4) + expected_shape = (1, 2, 2) + reshaped_inputs = tf.constant([[[2.0, 2.0], [3.0, 3.0]]]) + layer = GroupNormalization(groups=2, axis=1, scale=False, center=False) + normalized_input = layer._apply_normalization( + reshaped_inputs, input_shape) + self.assertTrue(tf.reduce_all( + tf.equal(normalized_input, tf.constant([[[0.0, 0.0], [0.0, 0.0]]])))) def test_axis_error(self): with self.assertRaises(ValueError): GroupNormalization(axis=0) - - - @tf_test_util.run_in_graph_and_eager_modes def test_groupnorm_flat(self): # Check basic usage of groupnorm_flat @@ -200,60 +203,57 @@ def test_groupnorm_flat(self): self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) - @tf_test_util.run_in_graph_and_eager_modes def test_layernorm_flat(self): # Check basic usage of layernorm - model = self._create_and_fit_Sequential_model(LayerNormalization(), (64,)) + model = self._create_and_fit_Sequential_model( + LayerNormalization(), (64,)) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) - @tf_test_util.run_in_graph_and_eager_modes def test_instancenorm_flat(self): # Check basic usage of instancenorm - model = self._create_and_fit_Sequential_model(InstanceNormalization(), (64,)) + model = self._create_and_fit_Sequential_model( + InstanceNormalization(), (64,)) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) - @tf_test_util.run_in_graph_and_eager_modes def test_initializer(self): # Check if the initializer for gamma and beta is working correctly - layer=GroupNormalization(groups=32, - beta_initializer='random_normal', - beta_constraint='NonNeg', - gamma_initializer='random_normal', - gamma_constraint='NonNeg') + layer = GroupNormalization(groups=32, + beta_initializer='random_normal', + beta_constraint='NonNeg', + gamma_initializer='random_normal', + gamma_constraint='NonNeg') - model = self._create_and_fit_Sequential_model(layer,(64,)) + model = self._create_and_fit_Sequential_model(layer, (64,)) weights = np.array(model.layers[0].get_weights()) negativ = weights[weights < 0.0] self.assertTrue(len(negativ) == 0) - @tf_test_util.run_in_graph_and_eager_modes def test_regularizations(self): layer = GroupNormalization( - gamma_regularizer='l1', - beta_regularizer='l1', - groups=4, - axis=2) + gamma_regularizer='l1', + beta_regularizer='l1', + groups=4, + axis=2) layer.build((None, 4, 4)) self.assertEqual(len(layer.losses), 2) max_norm = keras.constraints.max_norm layer = GroupNormalization( - gamma_constraint=max_norm, - beta_constraint=max_norm) + gamma_constraint=max_norm, + beta_constraint=max_norm) layer.build((None, 3, 4)) self.assertEqual(layer.gamma.constraint, max_norm) self.assertEqual(layer.beta.constraint, max_norm) - @tf_test_util.run_in_graph_and_eager_modes def test_groupnorm_conv(self): From 37244c4d8ed862659a40625f4ab24f66fb9d9a1d Mon Sep 17 00:00:00 2001 From: smokrow Date: Tue, 5 Mar 2019 23:10:42 +0100 Subject: [PATCH 22/26] Included some comments from @ppwwyyxx --- tensorflow_addons/layers/python/normalizations.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index e49df0ac16..57325960fd 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -30,8 +30,9 @@ class GroupNormalization(Layer): Group Normalization divides the channels into groups and computes within each group the mean and variance for normalization. - Group Normalization's computation is independent - of batch sizes, and its accuracy is stable in a wide range of batch sizes. + Empirically, its accuracy is more stable than batch norm in a wide + range of small batch sizes, if learning rate is adjusted linearly + with batch sizes. Relation to Layer Normalization: If the number of groups is set to 1, then this operation becomes identical to @@ -276,8 +277,9 @@ class LayerNormalization(GroupNormalization): Layer Normalization is an specific case of ```GroupNormalization```since it normalizes all features of a layer. The Groupsize is 1. - Layer Normalization's computation is independent - of batch sizes, and its accuracy is stable in a wide range of batch sizes. + Empirically, its accuracy is more stable than batch norm in a wide + range of small batch sizes, if learning rate is adjusted linearly + with batch sizes. Arguments axis: Integer, the axis that should be normalized @@ -322,8 +324,9 @@ class InstanceNormalization(GroupNormalization): Instance Normalization is an specific case of ```GroupNormalization```since it normalizes all features of one channel. The Groupsize is equal to the channel size. - Instance Normalization's computation is independent - of batch sizes, and its accuracy is stable in a wide range of batch sizes. + Empirically, its accuracy is more stable than batch norm in a wide + range of small batch sizes, if learning rate is adjusted linearly + with batch sizes. Arguments axis: Integer, the axis that should be normalized From 06694664b691b09e78106542d9b2ef1400e76950 Mon Sep 17 00:00:00 2001 From: smokrow Date: Tue, 5 Mar 2019 23:11:41 +0100 Subject: [PATCH 23/26] beautified --- .../layers/python/normalizations.py | 108 +++++++++--------- 1 file changed, 53 insertions(+), 55 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 57325960fd..579fb026ad 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -30,8 +30,8 @@ class GroupNormalization(Layer): Group Normalization divides the channels into groups and computes within each group the mean and variance for normalization. - Empirically, its accuracy is more stable than batch norm in a wide - range of small batch sizes, if learning rate is adjusted linearly + Empirically, its accuracy is more stable than batch norm in a wide + range of small batch sizes, if learning rate is adjusted linearly with batch sizes. Relation to Layer Normalization: @@ -119,21 +119,21 @@ def build(self, input_shape): self.built = True super(GroupNormalization, self).build(input_shape) - def call(self, inputs): input_shape = K.int_shape(inputs) tensor_input_shape = K.shape(inputs) - reshaped_inputs, group_shape=self._reshape_into_groups(inputs,input_shape,tensor_input_shape) + reshaped_inputs, group_shape = self._reshape_into_groups( + inputs, input_shape, tensor_input_shape) - normalized_inputs = self._apply_normalization(reshaped_inputs, input_shape) + normalized_inputs = self._apply_normalization( + reshaped_inputs, input_shape) outputs = K.reshape(normalized_inputs, tensor_input_shape) return outputs - def get_config(self): config = { 'groups': self.groups, @@ -151,12 +151,10 @@ def get_config(self): base_config = super(GroupNormalization, self).get_config() return dict(list(base_config.items()) + list(config.items())) - def compute_output_shape(self, input_shape): return input_shape - - def _reshape_into_groups(self,inputs,input_shape,tensor_input_shape): + def _reshape_into_groups(self, inputs, input_shape, tensor_input_shape): group_shape = [tensor_input_shape[i] for i in range(len(input_shape))] group_shape[self.axis] = input_shape[self.axis] // self.groups @@ -165,8 +163,7 @@ def _reshape_into_groups(self,inputs,input_shape,tensor_input_shape): reshaped_inputs = K.reshape(inputs, group_shape) return reshaped_inputs, group_shape - - def _apply_normalization(self, reshaped_inputs , input_shape): + def _apply_normalization(self, reshaped_inputs, input_shape): group_shape = K.int_shape(reshaped_inputs) group_reduction_axes = list(range(len(group_shape))) @@ -174,20 +171,19 @@ def _apply_normalization(self, reshaped_inputs , input_shape): mean, variance = nn.moments(reshaped_inputs, group_reduction_axes[2:], keep_dims=True) - gamma,beta= self._get_reshaped_weights(input_shape) - normalized_inputs= nn.batch_normalization(reshaped_inputs, - mean = mean, - variance = variance, - scale = gamma, - offset = beta, - variance_epsilon = self.epsilon) + gamma, beta = self._get_reshaped_weights(input_shape) + normalized_inputs = nn.batch_normalization(reshaped_inputs, + mean=mean, + variance=variance, + scale=gamma, + offset=beta, + variance_epsilon=self.epsilon) return normalized_inputs - def _get_reshaped_weights(self, input_shape): - broadcast_shape=self._create_broadcast_shape(input_shape) - gamma=None - beta=None + broadcast_shape = self._create_broadcast_shape(input_shape) + gamma = None + beta = None if self.scale: gamma = K.reshape(self.gamma, broadcast_shape) @@ -195,7 +191,6 @@ def _get_reshaped_weights(self, input_shape): beta = K.reshape(self.beta, broadcast_shape) return gamma, beta - def _check_if_input_shape_is_None(self, input_shape): dim = input_shape[self.axis] if dim is None: @@ -204,17 +199,15 @@ def _check_if_input_shape_is_None(self, input_shape): 'but the layer received an input with shape ' + str(input_shape) + '.') - def _set_number_of_groups_for_instance_norm(self, input_shape): - dim=input_shape[self.axis] - - if self.groups==-1: - self.groups=dim + dim = input_shape[self.axis] + if self.groups == -1: + self.groups = dim - def _check_size_of_dimensions(self,input_shape): + def _check_size_of_dimensions(self, input_shape): - dim=input_shape[self.axis] + dim = input_shape[self.axis] if dim < self.groups: raise ValueError('Number of groups (' + str(self.groups) + ') cannot be ' 'more than the number of channels (' + @@ -227,19 +220,20 @@ def _check_size_of_dimensions(self,input_shape): def _check_axis(self): - if self.axis==0: - raise ValueError("You are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead") - def _create_input_spec(self,input_shape): + if self.axis == 0: + raise ValueError( + "You are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead") + + def _create_input_spec(self, input_shape): - dim=input_shape[self.axis] + dim = input_shape[self.axis] self.input_spec = InputSpec(ndim=len(input_shape), axes={self.axis: dim}) + def _add_gamma_weight(self, input_shape): - def _add_gamma_weight(self,input_shape): - - dim=input_shape[self.axis] - shape=(dim,) + dim = input_shape[self.axis] + shape = (dim,) if self.scale: self.gamma = self.add_weight(shape=shape, @@ -250,10 +244,10 @@ def _add_gamma_weight(self,input_shape): else: self.gamma = None - def _add_beta_weight(self,input_shape): + def _add_beta_weight(self, input_shape): - dim=input_shape[self.axis] - shape=(dim,) + dim = input_shape[self.axis] + shape = (dim,) if self.center: self.beta = self.add_weight(shape=shape, @@ -264,8 +258,7 @@ def _add_beta_weight(self,input_shape): else: self.beta = None - - def _create_broadcast_shape(self,input_shape): + def _create_broadcast_shape(self, input_shape): broadcast_shape = [1] * len(input_shape) broadcast_shape[self.axis] = input_shape[self.axis] // self.groups broadcast_shape.insert(1, self.groups) @@ -277,8 +270,8 @@ class LayerNormalization(GroupNormalization): Layer Normalization is an specific case of ```GroupNormalization```since it normalizes all features of a layer. The Groupsize is 1. - Empirically, its accuracy is more stable than batch norm in a wide - range of small batch sizes, if learning rate is adjusted linearly + Empirically, its accuracy is more stable than batch norm in a wide + range of small batch sizes, if learning rate is adjusted linearly with batch sizes. Arguments @@ -313,19 +306,22 @@ class LayerNormalization(GroupNormalization): References - [Layer Normalization](https://arxiv.org/abs/1607.06450) """ - def __init__(self,**kwargs): + + def __init__(self, **kwargs): if "groups" in kwargs: - tf.logging.warning("The given value for groups will be overwritten.") - kwargs["groups"]=1 - super(LayerNormalization,self).__init__(**kwargs) + tf.logging.warning( + "The given value for groups will be overwritten.") + kwargs["groups"] = 1 + super(LayerNormalization, self).__init__(**kwargs) + class InstanceNormalization(GroupNormalization): """Instance normalization layer. Instance Normalization is an specific case of ```GroupNormalization```since it normalizes all features of one channel. The Groupsize is equal to the channel size. - Empirically, its accuracy is more stable than batch norm in a wide - range of small batch sizes, if learning rate is adjusted linearly + Empirically, its accuracy is more stable than batch norm in a wide + range of small batch sizes, if learning rate is adjusted linearly with batch sizes. Arguments @@ -360,10 +356,12 @@ class InstanceNormalization(GroupNormalization): References - [Layer Normalization](https://arxiv.org/abs/1607.06450) """ - def __init__(self,**kwargs): + + def __init__(self, **kwargs): if "groups" in kwargs: - tf.logging.warning("The given value for groups will be overwritten.") + tf.logging.warning( + "The given value for groups will be overwritten.") - kwargs["groups"]=-1 - super(InstanceNormalization,self).__init__(**kwargs) + kwargs["groups"] = -1 + super(InstanceNormalization, self).__init__(**kwargs) From b4613aecd05feb74f7b4464b2812fd01e9605eff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Kr=C3=B6ger?= Date: Thu, 7 Mar 2019 20:54:02 +0100 Subject: [PATCH 24/26] Update normalizations.py removed wrong documentation --- tensorflow_addons/layers/python/normalizations.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 579fb026ad..239e77245d 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -58,9 +58,6 @@ class GroupNormalization(Layer): If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. beta_initializer: Initializer for the beta weight. gamma_initializer: Initializer for the gamma weight. beta_regularizer: Optional regularizer for the beta weight. @@ -285,9 +282,6 @@ class LayerNormalization(GroupNormalization): If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. beta_initializer: Initializer for the beta weight. gamma_initializer: Initializer for the gamma weight. beta_regularizer: Optional regularizer for the beta weight. @@ -335,9 +329,6 @@ class InstanceNormalization(GroupNormalization): If False, `beta` is ignored. scale: If True, multiply by `gamma`. If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. beta_initializer: Initializer for the beta weight. gamma_initializer: Initializer for the gamma weight. beta_regularizer: Optional regularizer for the beta weight. From fcd163935468ff182cd95246335211828c466b24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Moritz=20Kr=C3=B6ger?= Date: Thu, 7 Mar 2019 21:00:32 +0100 Subject: [PATCH 25/26] Update normalizations.py Removed explanation of layers. Will be added to colab --- .../layers/python/normalizations.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index 239e77245d..adc3929dcd 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -48,11 +48,7 @@ class GroupNormalization(Layer): groups: Integer, the number of groups for Group Normalization. Can be in the range [1, N] where N is the input dimension. The input dimension must be divisible by the number of groups. - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `BatchNormalization`. + axis: Integer, the axis that should be normalized. epsilon: Small float added to variance to avoid dividing by zero. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. @@ -272,11 +268,7 @@ class LayerNormalization(GroupNormalization): with batch sizes. Arguments - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `BatchNormalization`. + axis: Integer, the axis that should be normalized. epsilon: Small float added to variance to avoid dividing by zero. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. @@ -319,11 +311,7 @@ class InstanceNormalization(GroupNormalization): with batch sizes. Arguments - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `BatchNormalization`. + axis: Integer, the axis that should be normalized. epsilon: Small float added to variance to avoid dividing by zero. center: If True, add offset of `beta` to normalized tensor. If False, `beta` is ignored. From 429ded25e67b1c8abe4a5c26c6204cf437fbe7de Mon Sep 17 00:00:00 2001 From: Sean Morgan Date: Sat, 9 Mar 2019 14:55:27 -0500 Subject: [PATCH 26/26] * Standardize formatting with project * Remove tf.logging as part of TF2 * Add normaliztion layers to init * Update READMEs --- README.md | 3 + tensorflow_addons/layers/README.md | 3 + tensorflow_addons/layers/__init__.py | 3 + .../layers/python/normalizations.py | 171 ++++++++++-------- .../layers/python/normalizations_test.py | 125 ++++++------- 5 files changed, 166 insertions(+), 139 deletions(-) diff --git a/README.md b/README.md index c165bf06e9..4a1e2fc174 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,9 @@ developments that cannot be integrated into core TensorFlow |:----------------------- |:----------- |:---------------------------- | | tfa.activations | Sparsemax | https://arxiv.org/abs/1602.02068 | | tfa.image | transform | | +| tfa.layers | GroupNormalization | https://arxiv.org/abs/1803.08494 | +| tfa.layers | InstanceNormalization | https://arxiv.org/abs/1607.08022 | +| tfa.layers | LayerNormalization | https://arxiv.org/abs/1607.06450 | | tfa.layers | Maxout | https://arxiv.org/abs/1302.4389 | | tfa.layers | PoinareNormalize | https://arxiv.org/abs/1705.08039 | | tfa.layers | WeightNormalization | https://arxiv.org/abs/1602.07868 | diff --git a/tensorflow_addons/layers/README.md b/tensorflow_addons/layers/README.md index c9832c87c1..9e34f0ac5d 100644 --- a/tensorflow_addons/layers/README.md +++ b/tensorflow_addons/layers/README.md @@ -3,6 +3,9 @@ ## Contents | Layer | Reference | |:----------------------- |:-----------------------------| +| GroupNormalization | https://arxiv.org/abs/1803.08494 | +| InstanceNormalization | https://arxiv.org/abs/1607.08022 | +| LayerNormalization | https://arxiv.org/abs/1607.06450 | | Maxout | https://arxiv.org/abs/1302.4389 | | PoinareNormalize | https://arxiv.org/abs/1705.08039 | | WeightNormalization | https://arxiv.org/abs/1602.07868 | diff --git a/tensorflow_addons/layers/__init__.py b/tensorflow_addons/layers/__init__.py index 0e06709ac7..c5e0497726 100644 --- a/tensorflow_addons/layers/__init__.py +++ b/tensorflow_addons/layers/__init__.py @@ -19,6 +19,9 @@ from __future__ import print_function from tensorflow_addons.layers.python.maxout import Maxout +from tensorflow_addons.layers.python.normalizations import GroupNormalization +from tensorflow_addons.layers.python.normalizations import InstanceNormalization +from tensorflow_addons.layers.python.normalizations import LayerNormalization from tensorflow_addons.layers.python.poincare import PoincareNormalize from tensorflow_addons.layers.python.sparsemax import Sparsemax from tensorflow_addons.layers.python.wrappers import WeightNormalization diff --git a/tensorflow_addons/layers/python/normalizations.py b/tensorflow_addons/layers/python/normalizations.py index adc3929dcd..2a07a3d802 100644 --- a/tensorflow_addons/layers/python/normalizations.py +++ b/tensorflow_addons/layers/python/normalizations.py @@ -11,21 +11,20 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# ============================================================================= # Orginal implementation from keras_contrib/layer/normalization +# ============================================================================= +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import logging import tensorflow as tf -from tensorflow.keras import backend as K -from tensorflow.keras import constraints -from tensorflow.keras import initializers -from tensorflow.keras import regularizers -from tensorflow.keras.layers import InputSpec -from tensorflow.keras.layers import Layer -from tensorflow.python.ops import nn +from tensorflow_addons.utils.python import keras_utils -class GroupNormalization(Layer): +@keras_utils.register_keras_custom_object +class GroupNormalization(tf.keras.layers.Layer): """Group normalization layer. Group Normalization divides the channels into groups and computes @@ -35,8 +34,8 @@ class GroupNormalization(Layer): with batch sizes. Relation to Layer Normalization: - If the number of groups is set to 1, then this operation becomes identical to - Layer Normalization. + If the number of groups is set to 1, then this operation becomes identical + to Layer Normalization. Relation to Instance Normalization: If the number of groups is set to the @@ -92,17 +91,17 @@ def __init__(self, self.epsilon = epsilon self.center = center self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - self.beta_regularizer = regularizers.get(beta_regularizer) - self.gamma_regularizer = regularizers.get(gamma_regularizer) - self.beta_constraint = constraints.get(beta_constraint) - self.gamma_constraint = constraints.get(gamma_constraint) + self.beta_initializer = tf.keras.initializers.get(beta_initializer) + self.gamma_initializer = tf.keras.initializers.get(gamma_initializer) + self.beta_regularizer = tf.keras.regularizers.get(beta_regularizer) + self.gamma_regularizer = tf.keras.regularizers.get(gamma_regularizer) + self.beta_constraint = tf.keras.constraints.get(beta_constraint) + self.gamma_constraint = tf.keras.constraints.get(gamma_constraint) self._check_axis() def build(self, input_shape): - self._check_if_input_shape_is_None(input_shape) + self._check_if_input_shape_is_none(input_shape) self._set_number_of_groups_for_instance_norm(input_shape) self._check_size_of_dimensions(input_shape) self._create_input_spec(input_shape) @@ -114,32 +113,43 @@ def build(self, input_shape): def call(self, inputs): - input_shape = K.int_shape(inputs) - tensor_input_shape = K.shape(inputs) + input_shape = tf.keras.backend.int_shape(inputs) + tensor_input_shape = tf.shape(inputs) reshaped_inputs, group_shape = self._reshape_into_groups( inputs, input_shape, tensor_input_shape) - normalized_inputs = self._apply_normalization( - reshaped_inputs, input_shape) + normalized_inputs = self._apply_normalization(reshaped_inputs, + input_shape) - outputs = K.reshape(normalized_inputs, tensor_input_shape) + outputs = tf.reshape(normalized_inputs, tensor_input_shape) return outputs def get_config(self): config = { - 'groups': self.groups, - 'axis': self.axis, - 'epsilon': self.epsilon, - 'center': self.center, - 'scale': self.scale, - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'gamma_initializer': initializers.serialize(self.gamma_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'gamma_constraint': constraints.serialize(self.gamma_constraint) + 'groups': + self.groups, + 'axis': + self.axis, + 'epsilon': + self.epsilon, + 'center': + self.center, + 'scale': + self.scale, + 'beta_initializer': + tf.keras.initializers.serialize(self.beta_initializer), + 'gamma_initializer': + tf.keras.initializers.serialize(self.gamma_initializer), + 'beta_regularizer': + tf.keras.regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': + tf.keras.regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': + tf.keras.constraints.serialize(self.beta_constraint), + 'gamma_constraint': + tf.keras.constraints.serialize(self.gamma_constraint) } base_config = super(GroupNormalization, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -152,25 +162,27 @@ def _reshape_into_groups(self, inputs, input_shape, tensor_input_shape): group_shape = [tensor_input_shape[i] for i in range(len(input_shape))] group_shape[self.axis] = input_shape[self.axis] // self.groups group_shape.insert(1, self.groups) - group_shape = K.stack(group_shape) - reshaped_inputs = K.reshape(inputs, group_shape) + group_shape = tf.stack(group_shape) + reshaped_inputs = tf.reshape(inputs, group_shape) return reshaped_inputs, group_shape def _apply_normalization(self, reshaped_inputs, input_shape): - group_shape = K.int_shape(reshaped_inputs) + group_shape = tf.keras.backend.int_shape(reshaped_inputs) group_reduction_axes = list(range(len(group_shape))) - # Remember the ordering of the tensor is [batch, group , steps]. Jump the first 2 to calculate the variance and the mean - mean, variance = nn.moments(reshaped_inputs, group_reduction_axes[2:], - keep_dims=True) + # Remember the ordering of the tensor is [batch, group , steps]. Jump + # the first 2 to calculate the variance and the mean + mean, variance = tf.nn.moments( + reshaped_inputs, group_reduction_axes[2:], keepdims=True) gamma, beta = self._get_reshaped_weights(input_shape) - normalized_inputs = nn.batch_normalization(reshaped_inputs, - mean=mean, - variance=variance, - scale=gamma, - offset=beta, - variance_epsilon=self.epsilon) + normalized_inputs = tf.nn.batch_normalization( + reshaped_inputs, + mean=mean, + variance=variance, + scale=gamma, + offset=beta, + variance_epsilon=self.epsilon) return normalized_inputs def _get_reshaped_weights(self, input_shape): @@ -178,13 +190,13 @@ def _get_reshaped_weights(self, input_shape): gamma = None beta = None if self.scale: - gamma = K.reshape(self.gamma, broadcast_shape) + gamma = tf.reshape(self.gamma, broadcast_shape) if self.center: - beta = K.reshape(self.beta, broadcast_shape) + beta = tf.reshape(self.beta, broadcast_shape) return gamma, beta - def _check_if_input_shape_is_None(self, input_shape): + def _check_if_input_shape_is_none(self, input_shape): dim = input_shape[self.axis] if dim is None: raise ValueError('Axis ' + str(self.axis) + ' of ' @@ -202,26 +214,27 @@ def _check_size_of_dimensions(self, input_shape): dim = input_shape[self.axis] if dim < self.groups: - raise ValueError('Number of groups (' + str(self.groups) + ') cannot be ' - 'more than the number of channels (' + - str(dim) + ').') + raise ValueError( + 'Number of groups (' + str(self.groups) + ') cannot be ' + 'more than the number of channels (' + str(dim) + ').') if dim % self.groups != 0: - raise ValueError('Number of groups (' + str(self.groups) + ') must be a ' - 'multiple of the number of channels (' + - str(dim) + ').') + raise ValueError( + 'Number of groups (' + str(self.groups) + ') must be a ' + 'multiple of the number of channels (' + str(dim) + ').') def _check_axis(self): if self.axis == 0: raise ValueError( - "You are trying to normalize your batch axis. Do you want to use tf.layer.batch_normalization instead") + "You are trying to normalize your batch axis. Do you want to " + "use tf.layer.batch_normalization instead") def _create_input_spec(self, input_shape): dim = input_shape[self.axis] - self.input_spec = InputSpec(ndim=len(input_shape), - axes={self.axis: dim}) + self.input_spec = tf.keras.layers.InputSpec( + ndim=len(input_shape), axes={self.axis: dim}) def _add_gamma_weight(self, input_shape): @@ -229,11 +242,12 @@ def _add_gamma_weight(self, input_shape): shape = (dim,) if self.scale: - self.gamma = self.add_weight(shape=shape, - name='gamma', - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint) + self.gamma = self.add_weight( + shape=shape, + name='gamma', + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint) else: self.gamma = None @@ -243,11 +257,12 @@ def _add_beta_weight(self, input_shape): shape = (dim,) if self.center: - self.beta = self.add_weight(shape=shape, - name='beta', - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint) + self.beta = self.add_weight( + shape=shape, + name='beta', + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint) else: self.beta = None @@ -258,6 +273,7 @@ def _create_broadcast_shape(self, input_shape): return broadcast_shape +@keras_utils.register_keras_custom_object class LayerNormalization(GroupNormalization): """Layer normalization layer. @@ -295,19 +311,19 @@ class LayerNormalization(GroupNormalization): def __init__(self, **kwargs): if "groups" in kwargs: - tf.logging.warning( - "The given value for groups will be overwritten.") + logging.warning("The given value for groups will be overwritten.") kwargs["groups"] = 1 super(LayerNormalization, self).__init__(**kwargs) +@keras_utils.register_keras_custom_object class InstanceNormalization(GroupNormalization): """Instance normalization layer. - Instance Normalization is an specific case of ```GroupNormalization```since it - normalizes all features of one channel. The Groupsize is equal to the channel size. - Empirically, its accuracy is more stable than batch norm in a wide - range of small batch sizes, if learning rate is adjusted linearly + Instance Normalization is an specific case of ```GroupNormalization```since + it normalizes all features of one channel. The Groupsize is equal to the + channel size. Empirically, its accuracy is more stable than batch norm in a + wide range of small batch sizes, if learning rate is adjusted linearly with batch sizes. Arguments @@ -333,14 +349,13 @@ class InstanceNormalization(GroupNormalization): Same shape as input. References - - [Layer Normalization](https://arxiv.org/abs/1607.06450) + - [Instance Normalization: The Missing Ingredient for Fast Stylization] + (https://arxiv.org/abs/1607.08022) """ def __init__(self, **kwargs): - if "groups" in kwargs: - tf.logging.warning( - "The given value for groups will be overwritten.") + logging.warning("The given value for groups will be overwritten.") kwargs["groups"] = -1 super(InstanceNormalization, self).__init__(**kwargs) diff --git a/tensorflow_addons/layers/python/normalizations_test.py b/tensorflow_addons/layers/python/normalizations_test.py index 3b83d70fb6..f3bf95afae 100644 --- a/tensorflow_addons/layers/python/normalizations_test.py +++ b/tensorflow_addons/layers/python/normalizations_test.py @@ -12,32 +12,29 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================= +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function import numpy as np -import scipy as scipy import tensorflow as tf -from tensorflow import keras as keras + from tensorflow_addons.layers.python.normalizations import GroupNormalization from tensorflow_addons.layers.python.normalizations import InstanceNormalization from tensorflow_addons.layers.python.normalizations import LayerNormalization -from tensorflow.python.framework import test_util as tf_test_util -from tensorflow.python.keras import keras_parameterized -from tensorflow.python.keras import testing_utils -from tensorflow.python.keras.layers import normalization -from tensorflow.python.training.rmsprop import RMSPropOptimizer -from tensorflow.python.platform import test -from tensorflow.python.training import gradient_descent +from tensorflow_addons.utils.python import test_utils -class normalization_test(test.TestCase): +class NormalizationTest(tf.test.TestCase): - # ------------Tests to ensure proper inheritance. If these suceed you can test for Instance norm and Layernorm by setting Groupnorm groups = -1 or 1 + # ------------Tests to ensure proper inheritance. If these suceed you can + # test for Instance norm and Layernorm by setting Groupnorm groups = -1 or 1 def test_inheritance(self): self.assertTrue(issubclass(LayerNormalization, GroupNormalization)) self.assertTrue(issubclass(InstanceNormalization, GroupNormalization)) self.assertTrue(LayerNormalization.build == GroupNormalization.build) - self.assertTrue(InstanceNormalization.build == - GroupNormalization.build) + self.assertTrue( + InstanceNormalization.build == GroupNormalization.build) self.assertTrue(LayerNormalization.call == GroupNormalization.call) self.assertTrue(InstanceNormalization.call == GroupNormalization.call) @@ -46,11 +43,11 @@ def test_groups_after_init(self): self.assertTrue(layers.groups == -1) layers = LayerNormalization() self.assertTrue(layers.groups == 1) -# ----------------------------------------------------------------------------------------------------------------------------------------- + + # ------------------------------------------------------------------------------ def test_reshape(self): def run_reshape_test(axis, group, input_shape, expected_shape): - group_layer = GroupNormalization(groups=group, axis=axis) group_layer._set_number_of_groups_for_instance_norm(input_shape) @@ -93,7 +90,8 @@ def test_picture_input(self): self._test_random_shape_on_all_axis_except_batch( shape, groups, center, scale) - def _test_random_shape_on_all_axis_except_batch(self, shape, groups, center, scale): + def _test_random_shape_on_all_axis_except_batch(self, shape, groups, + center, scale): inputs = tf.random.normal((shape)) for axis in range(1, len(shape)): self._test_specific_layer(inputs, axis, groups, center, scale) @@ -105,7 +103,7 @@ def _test_specific_layer(self, inputs, axis, groups, center, scale): # Get Output from Keras model layer = GroupNormalization( axis=axis, groups=groups, center=center, scale=scale) - model = keras.models.Sequential() + model = tf.keras.models.Sequential() model.add(layer) outputs = model.predict(inputs) self.assertFalse(np.isnan(outputs).any()) @@ -120,10 +118,14 @@ def _test_specific_layer(self, inputs, axis, groups, center, scale): reshaped_inputs = np.reshape(np_inputs, tuple(reshaped_dims)) # Calculate mean and variance - mean = np.mean(reshaped_inputs, axis=tuple( - range(2, len(reshaped_dims))), keepdims=True) - variance = np.var(reshaped_inputs, axis=tuple( - range(2, len(reshaped_dims))), keepdims=True) + mean = np.mean( + reshaped_inputs, + axis=tuple(range(2, len(reshaped_dims))), + keepdims=True) + variance = np.var( + reshaped_inputs, + axis=tuple(range(2, len(reshaped_dims))), + keepdims=True) # Get gamma and beta initalized by layer gamma, beta = layer._get_reshaped_weights(input_shape) @@ -143,20 +145,21 @@ def _test_specific_layer(self, inputs, axis, groups, center, scale): def _create_and_fit_Sequential_model(self, layer, shape): # Helperfunction for quick evaluation - model = keras.models.Sequential() + model = tf.keras.models.Sequential() model.add(layer) - model.add(keras.layers.Dense(32)) - model.add(keras.layers.Dense(1)) + model.add(tf.keras.layers.Dense(32)) + model.add(tf.keras.layers.Dense(1)) - model.compile(optimizer=RMSPropOptimizer(0.01), - loss="categorical_crossentropy") + model.compile( + optimizer=tf.keras.optimizers.RMSprop(0.01), + loss="categorical_crossentropy") layer_shape = (10,) + shape input_batch = np.random.rand(*layer_shape) output_batch = np.random.rand(*(10, 1)) model.fit(x=input_batch, y=output_batch, epochs=1, batch_size=1) return model - @tf_test_util.run_in_graph_and_eager_modes + @test_utils.run_in_graph_and_eager_modes def test_weights(self): # Check if weights get initialized correctly layer = GroupNormalization(groups=1, scale=False, center=False) @@ -180,17 +183,19 @@ def test_apply_normalization(self): expected_shape = (1, 2, 2) reshaped_inputs = tf.constant([[[2.0, 2.0], [3.0, 3.0]]]) layer = GroupNormalization(groups=2, axis=1, scale=False, center=False) - normalized_input = layer._apply_normalization( - reshaped_inputs, input_shape) - self.assertTrue(tf.reduce_all( - tf.equal(normalized_input, tf.constant([[[0.0, 0.0], [0.0, 0.0]]])))) + normalized_input = layer._apply_normalization(reshaped_inputs, + input_shape) + self.assertTrue( + tf.reduce_all( + tf.equal(normalized_input, + tf.constant([[[0.0, 0.0], [0.0, 0.0]]])))) def test_axis_error(self): with self.assertRaises(ValueError): GroupNormalization(axis=0) - @tf_test_util.run_in_graph_and_eager_modes + @test_utils.run_in_graph_and_eager_modes def test_groupnorm_flat(self): # Check basic usage of groupnorm_flat # Testing for 1 == LayerNorm, 16 == GroupNorm, -1 == InstanceNorm @@ -203,33 +208,34 @@ def test_groupnorm_flat(self): self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) - @tf_test_util.run_in_graph_and_eager_modes + @test_utils.run_in_graph_and_eager_modes def test_layernorm_flat(self): # Check basic usage of layernorm - model = self._create_and_fit_Sequential_model( - LayerNormalization(), (64,)) + model = self._create_and_fit_Sequential_model(LayerNormalization(), + (64,)) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) - @tf_test_util.run_in_graph_and_eager_modes + @test_utils.run_in_graph_and_eager_modes def test_instancenorm_flat(self): # Check basic usage of instancenorm - model = self._create_and_fit_Sequential_model( - InstanceNormalization(), (64,)) + model = self._create_and_fit_Sequential_model(InstanceNormalization(), + (64,)) self.assertTrue(hasattr(model.layers[0], 'gamma')) self.assertTrue(hasattr(model.layers[0], 'beta')) - @tf_test_util.run_in_graph_and_eager_modes + @test_utils.run_in_graph_and_eager_modes def test_initializer(self): # Check if the initializer for gamma and beta is working correctly - layer = GroupNormalization(groups=32, - beta_initializer='random_normal', - beta_constraint='NonNeg', - gamma_initializer='random_normal', - gamma_constraint='NonNeg') + layer = GroupNormalization( + groups=32, + beta_initializer='random_normal', + beta_constraint='NonNeg', + gamma_initializer='random_normal', + gamma_constraint='NonNeg') model = self._create_and_fit_Sequential_model(layer, (64,)) @@ -237,38 +243,35 @@ def test_initializer(self): negativ = weights[weights < 0.0] self.assertTrue(len(negativ) == 0) - @tf_test_util.run_in_graph_and_eager_modes + @test_utils.run_in_graph_and_eager_modes def test_regularizations(self): layer = GroupNormalization( - gamma_regularizer='l1', - beta_regularizer='l1', - groups=4, - axis=2) + gamma_regularizer='l1', beta_regularizer='l1', groups=4, axis=2) layer.build((None, 4, 4)) self.assertEqual(len(layer.losses), 2) - max_norm = keras.constraints.max_norm + max_norm = tf.keras.constraints.max_norm layer = GroupNormalization( - gamma_constraint=max_norm, - beta_constraint=max_norm) + gamma_constraint=max_norm, beta_constraint=max_norm) layer.build((None, 3, 4)) self.assertEqual(layer.gamma.constraint, max_norm) self.assertEqual(layer.beta.constraint, max_norm) - @tf_test_util.run_in_graph_and_eager_modes + @test_utils.run_in_graph_and_eager_modes def test_groupnorm_conv(self): # Check if Axis is working for CONV nets # Testing for 1 == LayerNorm, 5 == GroupNorm, -1 == InstanceNorm groups = [-1, 5, 1] for i in groups: - model = keras.models.Sequential() - model.add(GroupNormalization( - axis=1, groups=i, input_shape=(20, 20, 3))) - model.add(keras.layers.Conv2D(5, (1, 1), padding='same')) - model.add(keras.layers.Flatten()) - model.add(keras.layers.Dense(1, activation='softmax')) - model.compile(optimizer=RMSPropOptimizer(0.01), loss='mse') + model = tf.keras.models.Sequential() + model.add( + GroupNormalization(axis=1, groups=i, input_shape=(20, 20, 3))) + model.add(tf.keras.layers.Conv2D(5, (1, 1), padding='same')) + model.add(tf.keras.layers.Flatten()) + model.add(tf.keras.layers.Dense(1, activation='softmax')) + model.compile( + optimizer=tf.keras.optimizers.RMSprop(0.01), loss='mse') x = np.random.randint(1000, size=(10, 20, 20, 3)) y = np.random.randint(1000, size=(10, 1)) a = model.fit(x=x, y=y, epochs=1) @@ -276,4 +279,4 @@ def test_groupnorm_conv(self): if __name__ == "__main__": - test.main() + tf.test.main()