From 90197f872a19dbdade99b409d7e1e3c1b7ff4d08 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 20 Sep 2019 03:12:05 +0800 Subject: [PATCH 01/12] add lisht kernel --- tensorflow_addons/activations/BUILD | 14 +++ tensorflow_addons/activations/README.md | 5 +- tensorflow_addons/activations/__init__.py | 1 + tensorflow_addons/activations/lisht.py | 49 ++++++++ tensorflow_addons/activations/lisht_test.py | 91 +++++++++++++++ .../custom_ops/activations/BUILD | 26 +++++ .../activations/cc/kernels/lisht_op.cc | 79 +++++++++++++ .../activations/cc/kernels/lisht_op.h | 106 ++++++++++++++++++ .../activations/cc/kernels/lisht_op_gpu.cu.cc | 38 +++++++ .../custom_ops/activations/cc/ops/lisht_op.cc | 37 ++++++ 10 files changed, 444 insertions(+), 2 deletions(-) create mode 100644 tensorflow_addons/activations/lisht.py create mode 100644 tensorflow_addons/activations/lisht_test.py create mode 100644 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc create mode 100644 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h create mode 100644 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc create mode 100644 tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc diff --git a/tensorflow_addons/activations/BUILD b/tensorflow_addons/activations/BUILD index e5f9640bb4..e1885a8c40 100644 --- a/tensorflow_addons/activations/BUILD +++ b/tensorflow_addons/activations/BUILD @@ -8,6 +8,7 @@ py_library( "__init__.py", "gelu.py", "hardshrink.py", + "lisht.py", "sparsemax.py", "tanhshrink.py", ], @@ -57,6 +58,19 @@ py_test( ], ) +py_test( + name = "lisht_test", + size = "medium", + srcs = [ + "lisht_test.py", + ], + main = "lisht_test.py", + srcs_version = "PY2AND3", + deps = [ + ":activations", + ], +) + py_test( name = "tanhshrink_test", size = "medium", diff --git a/tensorflow_addons/activations/README.md b/tensorflow_addons/activations/README.md index d34c12b962..6e525d24ec 100644 --- a/tensorflow_addons/activations/README.md +++ b/tensorflow_addons/activations/README.md @@ -13,8 +13,9 @@ |:----------|:-----------|:---------------------------------| | gelu | gelu | https://arxiv.org/abs/1606.08415 | | hardshrink| hardshrink | | -| sparsemax | Sparsemax | https://arxiv.org/abs/1602.02068 | -| tanhshrink | Tanhshrink | | +| lisht | lisht | https://arxiv.org/abs/1901.05894 | +| sparsemax | sparsemax | https://arxiv.org/abs/1602.02068 | +| tanhshrink | tanhshrink | | ## Contribution Guidelines diff --git a/tensorflow_addons/activations/__init__.py b/tensorflow_addons/activations/__init__.py index 4208b57817..313a78a1e3 100644 --- a/tensorflow_addons/activations/__init__.py +++ b/tensorflow_addons/activations/__init__.py @@ -20,5 +20,6 @@ from tensorflow_addons.activations.gelu import gelu from tensorflow_addons.activations.hardshrink import hardshrink +from tensorflow_addons.activations.lisht import lisht from tensorflow_addons.activations.sparsemax import sparsemax from tensorflow_addons.activations.tanhshrink import tanhshrink diff --git a/tensorflow_addons/activations/lisht.py b/tensorflow_addons/activations/lisht.py new file mode 100644 index 0000000000..cbef569792 --- /dev/null +++ b/tensorflow_addons/activations/lisht.py @@ -0,0 +1,49 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf +from tensorflow_addons.utils import keras_utils +from tensorflow_addons.utils.resource_loader import get_path_to_datafile + +_activation_ops_so = tf.load_op_library( + get_path_to_datafile("custom_ops/activations/_activation_ops.so")) + + +@keras_utils.register_keras_custom_object +@tf.function +def lisht(x): + """LiSHT: Non-Parameteric Linearly Scaled Hyperbolic Tangent Activation Function. + + Computes linearly scaled hyperbolic tangent (LiSHT): `x * tanh(x)` + + See [LiSHT: Non-Parameteric Linearly Scaled Hyperbolic Tangent Activation Function for Neural Networks](https://arxiv.org/abs/1901.05894). + + Args: + x: A `Tensor`. Must be one of the following types: + `float16`, `float32`, `float64`. + Returns: + A `Tensor`. Has the same type as `x`. + """ + x = tf.convert_to_tensor(x) + return _activation_ops_so.addons_lisht(x) + + +@tf.RegisterGradient("Addons>Lisht") +def _lisht_grad(op, grad): + return _activation_ops_so.addons_lisht_grad(grad, op.inputs[0]) diff --git a/tensorflow_addons/activations/lisht_test.py b/tensorflow_addons/activations/lisht_test.py new file mode 100644 index 0000000000..c21fbafb13 --- /dev/null +++ b/tensorflow_addons/activations/lisht_test.py @@ -0,0 +1,91 @@ +# Copyright 2019 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from absl.testing import parameterized + +import numpy as np +import tensorflow as tf +from tensorflow_addons.activations import lisht +from tensorflow_addons.utils import test_utils + + +def _ref_lisht(x): + x = tf.convert_to_tensor(x) + return x * tf.tanh(x) + + +@test_utils.run_all_in_graph_and_eager_modes +class LishtTest(tf.test.TestCase, parameterized.TestCase): + @parameterized.named_parameters(("float16", np.float16), + ("float32", np.float32), + ("float64", np.float64)) + def test_lisht(self, dtype): + x = (np.random.rand(2, 3, 4) * 5.0 - 2.5).astype(dtype) + self.assertAllCloseAccordingToType(lisht(x), _ref_lisht(x)) + + @parameterized.named_parameters(("float16", np.float16), + ("float32", np.float32), + ("float64", np.float64)) + def test_gradients(self, dtype): + x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype) + + with tf.GradientTape(persistent=True) as tape: + tape.watch(x) + y_ref = _ref_lisht(x) + y = lisht(x) + grad_ref = tape.gradient(y_ref, x) + grad = tape.gradient(y, x) + self.assertAllCloseAccordingToType(grad, grad_ref) + + @parameterized.named_parameters(("float32", np.float32), + ("float64", np.float64)) + def test_theoretical_gradients(self, dtype): + # Only test theoretical gradients for float32 and float64 + # because of the instability of float16 while computing jacobian + x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype) + + theoretical, numerical = tf.test.compute_gradient(lisht, [x]) + self.assertAllCloseAccordingToType( + theoretical, numerical, atol=1e-4) + + def test_unknown_shape(self): + fn = lisht.get_concrete_function( + tf.TensorSpec(shape=None, dtype=tf.float32)) + + for shape in [(1,), (1, 2), (1, 2, 3), (1, 2, 3, 4)]: + x = tf.ones(shape=shape, dtype=tf.float32) + self.assertAllClose(fn(x), lisht(x)) + + def test_serialization(self): + ref_fn = lisht + config = tf.keras.activations.serialize(ref_fn) + fn = tf.keras.activations.deserialize(config) + self.assertEqual(fn, ref_fn) + + def test_serialization_with_layers(self): + layer = tf.keras.layers.Dense(3, activation=lisht) + config = tf.keras.layers.serialize(layer) + deserialized_layer = tf.keras.layers.deserialize(config) + self.assertEqual(deserialized_layer.__class__.__name__, + layer.__class__.__name__) + self.assertEqual(deserialized_layer.activation.__name__, "lisht") + + +if __name__ == "__main__": + tf.test.main() diff --git a/tensorflow_addons/custom_ops/activations/BUILD b/tensorflow_addons/custom_ops/activations/BUILD index b61d7a6fa3..567b06250d 100644 --- a/tensorflow_addons/custom_ops/activations/BUILD +++ b/tensorflow_addons/custom_ops/activations/BUILD @@ -49,6 +49,28 @@ cc_library( alwayslink = 1, ) +cc_library( + name = "lisht_op_gpu", + srcs = [ + "cc/kernels/lisht_op.h", + "cc/kernels/lisht_op_gpu.cu.cc", + ], + copts = if_cuda_is_configured([ + "-DGOOGLE_CUDA=1", + "-x cuda", + "-nvcc_options=relaxed-constexpr", + "-nvcc_options=ftz=true", + ]), + deps = [ + "@local_config_tf//:libtensorflow_framework", + "@local_config_tf//:tf_header_lib", + ] + if_cuda_is_configured([ + "@local_config_cuda//cuda:cuda_libs", + "@local_config_cuda//cuda:cuda_headers", + ]), + alwayslink = 1, +) + cc_library( name = "tanhshrink_op_gpu", srcs = [ @@ -78,10 +100,13 @@ cc_binary( "cc/kernels/gelu_op.h", "cc/kernels/hardshrink_op.cc", "cc/kernels/hardshrink_op.h", + "cc/kernels/lisht_op.cc", + "cc/kernels/lisht_op.h", "cc/kernels/tanhshrink_op.cc", "cc/kernels/tanhshrink_op.h", "cc/ops/gelu_op.cc", "cc/ops/hardshrink_op.cc", + "cc/ops/lisht_op.cc", "cc/ops/tanhshrink_op.cc", ], copts = [ @@ -96,6 +121,7 @@ cc_binary( ] + if_cuda_is_configured([ ":gelu_op_gpu", ":hardshrink_op_gpu", + ":lisht_op_gpu", ":tanhshrink_op_gpu", ]), ) diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc new file mode 100644 index 0000000000..7c13283f0e --- /dev/null +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc @@ -0,0 +1,79 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#define EIGEN_USE_THREADS + +#include "tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/register_types.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { +namespace addons { + +using CPUDevice = Eigen::ThreadPoolDevice; + +#define REGISTER_LISHT_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Addons>Lisht").Device(DEVICE_CPU).TypeConstraint("T"), \ + LishtOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Addons>LishtGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ + LishtGradOp); + +// Lisht only makes sense with floating points. +TF_CALL_GPU_NUMBER_TYPES(REGISTER_LISHT_KERNELS); +#undef REGISTER_LISHT_KERNELS + +#if GOOGLE_CUDA + +using GPUDevice = Eigen::GpuDevice; + +// Forward declarations of the functor specializations for GPU. +namespace functor { +#define DECLARE_GPU_SPEC(T) \ + template <> \ + void Lisht::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor features, \ + typename TTypes::Tensor activations); \ + extern template struct Lisht; \ + \ + template <> \ + void LishtGrad::operator()( \ + const GPUDevice& d, typename TTypes::ConstTensor gradients, \ + typename TTypes::ConstTensor features, \ + typename TTypes::Tensor backprops); \ + extern template struct LishtGrad; + +TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); +#undef DECLARE_GPU_SPEC +} // namespace functor + +// Registration of the GPU implementations. +#define REGISTER_LISHT_GPU_KERNELS(type) \ + REGISTER_KERNEL_BUILDER( \ + Name("Addons>Lisht").Device(DEVICE_GPU).TypeConstraint("T"), \ + LishtOp); \ + REGISTER_KERNEL_BUILDER( \ + Name("Addons>LishtGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ + LishtGradOp); + +TF_CALL_GPU_NUMBER_TYPES(REGISTER_LISHT_GPU_KERNELS); +#undef REGISTER_LISHT_GPU_KERNELS + +#endif // GOOGLE_CUDA + +} // end namespace addons +} // namespace tensorflow diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h new file mode 100644 index 0000000000..e73254f97e --- /dev/null +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h @@ -0,0 +1,106 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_ADDONS_ACTIVATIONS_KERNELS_LISHT_OP_H_ +#define TENSORFLOW_ADDONS_ACTIVATIONS_KERNELS_LISHT_OP_H_ + +#define EIGEN_USE_THREADS + +#include "tensorflow/core/framework/numeric_op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" + +namespace tensorflow { +namespace addons { +namespace functor { + +// Functor used by LishtOp to do the computations. +template +struct Lisht { + // Computes Lisht activation. + // + // features: any shape. + // activations: same shape as "features". + void operator()(const Device& d, typename TTypes::ConstTensor features, + typename TTypes::Tensor activations) { + activations.device(d) = features * features.tanh(); + } +}; + +// Functor used by LishtGradOp to do the computations. +template +struct LishtGrad { + // Computes LishtGrad backprops. + // + // gradients: gradients backpropagated to the List op. + // features: the inputs that were passed to the List op. + // backprops: gradients to backpropagate to the List inputs. + void operator()(const Device& d, typename TTypes::ConstTensor gradients, + typename TTypes::ConstTensor features, + typename TTypes::Tensor backprops) { + const auto g = features.tanh(); + backprops.device(d) = gradients * (features + g - features * g.square()); + } +}; + +} // namespace functor + +template +class LishtOp : public UnaryElementWiseOp> { + public: + explicit LishtOp(OpKernelConstruction* context) + : UnaryElementWiseOp>::UnaryElementWiseOp(context) { + } + + void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) { + functor::Lisht functor; + functor(context->eigen_device(), input.flat(), + output->flat()); + } +}; + +template +class LishtGradOp : public BinaryElementWiseOp> { + public: + explicit LishtGradOp(OpKernelConstruction* context) + : BinaryElementWiseOp>::BinaryElementWiseOp( + context) { + } + + void OperateNoTemplate(OpKernelContext* context, const Tensor& g, + const Tensor& a, Tensor* output); + + template + void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a, + Tensor* output) { + OperateNoTemplate(context, g, a, output); + } +}; + +template +void LishtGradOp::OperateNoTemplate(OpKernelContext* context, + const Tensor& g, const Tensor& a, + Tensor* output) { + functor::LishtGrad functor; + functor(context->eigen_device(), g.flat(), a.flat(), + output->flat()); +} + +} // namespace addons +} // namespace tensorflow + +#undef EIGEN_USE_THREADS + +#endif // TENSORFLOW_ADDONS_ACTIVATIONS_KERNELS_LISHT_OP_H_ diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc new file mode 100644 index 0000000000..c8646a336e --- /dev/null +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc @@ -0,0 +1,38 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#if GOOGLE_CUDA + +#define EIGEN_USE_GPU + +#include "tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h" +#include "tensorflow/core/framework/register_types.h" +#include "third_party/eigen3/Eigen/Core" + +namespace tensorflow { +namespace addons { + +using GPUDevice = Eigen::GpuDevice; + +#define DEFINE_GPU_KERNELS(T) \ + template struct functor::Lisht; \ + template struct functor::LishtGrad; + +TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); + +} // end namespace addons +} // namespace tensorflow + +#endif // GOOGLE_CUDA diff --git a/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc new file mode 100644 index 0000000000..0cbb7d3870 --- /dev/null +++ b/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc @@ -0,0 +1,37 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { +namespace addons { + +REGISTER_OP("Addons>Lisht") + .Input("features: T") + .Output("activations: T") + .Attr("T: {half, float, double}") + .SetShapeFn(shape_inference::UnchangedShape); + +REGISTER_OP("Addons>LishtGrad") + .Input("gradients: T") + .Input("features: T") + .Output("backprops: T") + .Attr("T: {half, float, double}") + .SetShapeFn(shape_inference::MergeBothInputsShapeFn); + +} // end namespace addons +} // namespace tensorflow From 3f8e8c93a92d54dbc1664a6cd57d64bf714def32 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 20 Sep 2019 03:17:05 +0800 Subject: [PATCH 02/12] update README --- tensorflow_addons/activations/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow_addons/activations/README.md b/tensorflow_addons/activations/README.md index 6e525d24ec..ede5eb30fb 100644 --- a/tensorflow_addons/activations/README.md +++ b/tensorflow_addons/activations/README.md @@ -4,9 +4,10 @@ | Submodule | Maintainers | Contact Info | |:----------|:--------------------------|:-----------------------------------------| | gelu | @AakashKumarNain @WindQAQ | aakashnain@outlook.com windqaq@gmail.com | -| hardshrink| @WindQAQ | windqaq@gmail.com +| hardshrink| @WindQAQ | windqaq@gmail.com | +| lisht | @WindQAQ | windqaq@gmail.com | | sparsemax | @AndreasMadsen | amwwebdk+github@gmail.com | -| tanhshrink | @fsx950223 | fsx950223@gmail.com | +| tanhshrink| @fsx950223 | fsx950223@gmail.com | ## Contents | Submodule | Activation | Reference | @@ -15,7 +16,7 @@ | hardshrink| hardshrink | | | lisht | lisht | https://arxiv.org/abs/1901.05894 | | sparsemax | sparsemax | https://arxiv.org/abs/1602.02068 | -| tanhshrink | tanhshrink | | +| tanhshrink| tanhshrink | | ## Contribution Guidelines @@ -23,7 +24,6 @@ In order to conform with the current API standard, all activations must: * Be a `tf.function`. - * Have the signature `fn(input, axis=-1, name=None)`. * [Register as a keras global object](https://github.com/tensorflow/addons/blob/master/tensorflow_addons/utils/python/keras_utils.py) so it can be serialized properly. * Add the addon to the `py_library` in this sub-package's BUILD file. From c1e599df64d76eb36d39b86e361900cf1cd9f2f9 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 20 Sep 2019 09:23:15 +0800 Subject: [PATCH 03/12] format code --- tensorflow_addons/activations/lisht_test.py | 3 +-- .../activations/cc/kernels/lisht_op.cc | 18 +++++++++--------- .../activations/cc/kernels/lisht_op.h | 11 +++++------ .../activations/cc/kernels/lisht_op_gpu.cu.cc | 2 +- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/tensorflow_addons/activations/lisht_test.py b/tensorflow_addons/activations/lisht_test.py index c21fbafb13..ca337ef569 100644 --- a/tensorflow_addons/activations/lisht_test.py +++ b/tensorflow_addons/activations/lisht_test.py @@ -61,8 +61,7 @@ def test_theoretical_gradients(self, dtype): x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype) theoretical, numerical = tf.test.compute_gradient(lisht, [x]) - self.assertAllCloseAccordingToType( - theoretical, numerical, atol=1e-4) + self.assertAllCloseAccordingToType(theoretical, numerical, atol=1e-4) def test_unknown_shape(self): fn = lisht.get_concrete_function( diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc index 7c13283f0e..686b5adf70 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc @@ -26,10 +26,10 @@ namespace addons { using CPUDevice = Eigen::ThreadPoolDevice; #define REGISTER_LISHT_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ + REGISTER_KERNEL_BUILDER( \ Name("Addons>Lisht").Device(DEVICE_CPU).TypeConstraint("T"), \ LishtOp); \ - REGISTER_KERNEL_BUILDER( \ + REGISTER_KERNEL_BUILDER( \ Name("Addons>LishtGrad").Device(DEVICE_CPU).TypeConstraint("T"), \ LishtGradOp); @@ -45,15 +45,15 @@ using GPUDevice = Eigen::GpuDevice; namespace functor { #define DECLARE_GPU_SPEC(T) \ template <> \ - void Lisht::operator()( \ + void Lisht::operator()( \ const GPUDevice& d, typename TTypes::ConstTensor features, \ - typename TTypes::Tensor activations); \ - extern template struct Lisht; \ + typename TTypes::Tensor activations); \ + extern template struct Lisht; \ \ template <> \ - void LishtGrad::operator()( \ + void LishtGrad::operator()( \ const GPUDevice& d, typename TTypes::ConstTensor gradients, \ - typename TTypes::ConstTensor features, \ + typename TTypes::ConstTensor features, \ typename TTypes::Tensor backprops); \ extern template struct LishtGrad; @@ -63,10 +63,10 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC); // Registration of the GPU implementations. #define REGISTER_LISHT_GPU_KERNELS(type) \ - REGISTER_KERNEL_BUILDER( \ + REGISTER_KERNEL_BUILDER( \ Name("Addons>Lisht").Device(DEVICE_GPU).TypeConstraint("T"), \ LishtOp); \ - REGISTER_KERNEL_BUILDER( \ + REGISTER_KERNEL_BUILDER( \ Name("Addons>LishtGrad").Device(DEVICE_GPU).TypeConstraint("T"), \ LishtGradOp); diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h index e73254f97e..c40664dee8 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h @@ -50,8 +50,8 @@ struct LishtGrad { void operator()(const Device& d, typename TTypes::ConstTensor gradients, typename TTypes::ConstTensor features, typename TTypes::Tensor backprops) { - const auto g = features.tanh(); - backprops.device(d) = gradients * (features + g - features * g.square()); + const auto g = features.tanh(); + backprops.device(d) = gradients * (features + g - features * g.square()); } }; @@ -76,8 +76,7 @@ class LishtGradOp : public BinaryElementWiseOp> { public: explicit LishtGradOp(OpKernelConstruction* context) : BinaryElementWiseOp>::BinaryElementWiseOp( - context) { - } + context) {} void OperateNoTemplate(OpKernelContext* context, const Tensor& g, const Tensor& a, Tensor* output); @@ -91,8 +90,8 @@ class LishtGradOp : public BinaryElementWiseOp> { template void LishtGradOp::OperateNoTemplate(OpKernelContext* context, - const Tensor& g, const Tensor& a, - Tensor* output) { + const Tensor& g, const Tensor& a, + Tensor* output) { functor::LishtGrad functor; functor(context->eigen_device(), g.flat(), a.flat(), output->flat()); diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc index c8646a336e..c356553157 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc @@ -26,7 +26,7 @@ namespace addons { using GPUDevice = Eigen::GpuDevice; -#define DEFINE_GPU_KERNELS(T) \ +#define DEFINE_GPU_KERNELS(T) \ template struct functor::Lisht; \ template struct functor::LishtGrad; From 42f5275efe621f35bcacbe4ec481369b6db65f40 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Fri, 20 Sep 2019 09:28:29 +0800 Subject: [PATCH 04/12] fix tolerance --- tensorflow_addons/activations/lisht_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow_addons/activations/lisht_test.py b/tensorflow_addons/activations/lisht_test.py index ca337ef569..72379d2334 100644 --- a/tensorflow_addons/activations/lisht_test.py +++ b/tensorflow_addons/activations/lisht_test.py @@ -61,7 +61,8 @@ def test_theoretical_gradients(self, dtype): x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype) theoretical, numerical = tf.test.compute_gradient(lisht, [x]) - self.assertAllCloseAccordingToType(theoretical, numerical, atol=1e-4) + self.assertAllCloseAccordingToType( + theoretical, numerical, rtol=5e-4, atol=5e-4) def test_unknown_shape(self): fn = lisht.get_concrete_function( From 31dd728371d64999724e09f2ff632ea76e74fecb Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 23 Sep 2019 14:36:34 +0800 Subject: [PATCH 05/12] reorder the computation --- tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h index c40664dee8..845dca62cf 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h @@ -51,7 +51,7 @@ struct LishtGrad { typename TTypes::ConstTensor features, typename TTypes::Tensor backprops) { const auto g = features.tanh(); - backprops.device(d) = gradients * (features + g - features * g.square()); + backprops.device(d) = gradients * (g + features * (static_cast(1.0) - g.square())); } }; From 360a7558ee90cdd322c2e5409e427d52fa634c84 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 23 Sep 2019 14:37:14 +0800 Subject: [PATCH 06/12] unify namespace --- tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc | 2 +- .../custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc index 686b5adf70..05d56a043f 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc @@ -75,5 +75,5 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_LISHT_GPU_KERNELS); #endif // GOOGLE_CUDA -} // end namespace addons +} // namespace addons } // namespace tensorflow diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc index c356553157..66e0f979a1 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc @@ -32,7 +32,7 @@ using GPUDevice = Eigen::GpuDevice; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); -} // end namespace addons +} // namespace addons } // namespace tensorflow #endif // GOOGLE_CUDA From ca891a9e29cfdc4edb1b059c76f76ab548bfc37f Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 23 Sep 2019 14:37:44 +0800 Subject: [PATCH 07/12] clean up testcase --- tensorflow_addons/activations/lisht_test.py | 32 +++++---------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/tensorflow_addons/activations/lisht_test.py b/tensorflow_addons/activations/lisht_test.py index 72379d2334..b4e7fd2dfc 100644 --- a/tensorflow_addons/activations/lisht_test.py +++ b/tensorflow_addons/activations/lisht_test.py @@ -25,40 +25,23 @@ from tensorflow_addons.utils import test_utils -def _ref_lisht(x): - x = tf.convert_to_tensor(x) - return x * tf.tanh(x) - - @test_utils.run_all_in_graph_and_eager_modes class LishtTest(tf.test.TestCase, parameterized.TestCase): @parameterized.named_parameters(("float16", np.float16), ("float32", np.float32), ("float64", np.float64)) def test_lisht(self, dtype): - x = (np.random.rand(2, 3, 4) * 5.0 - 2.5).astype(dtype) - self.assertAllCloseAccordingToType(lisht(x), _ref_lisht(x)) - - @parameterized.named_parameters(("float16", np.float16), - ("float32", np.float32), - ("float64", np.float64)) - def test_gradients(self, dtype): - x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype) - - with tf.GradientTape(persistent=True) as tape: - tape.watch(x) - y_ref = _ref_lisht(x) - y = lisht(x) - grad_ref = tape.gradient(y_ref, x) - grad = tape.gradient(y, x) - self.assertAllCloseAccordingToType(grad, grad_ref) + x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) + expected_result = tf.constant( + [1.9280552, 0.7615942, 0.0, 0.7615942, 1.9280552], dtype=dtype) + self.assertAllCloseAccordingToType(lisht(x), expected_result) @parameterized.named_parameters(("float32", np.float32), ("float64", np.float64)) def test_theoretical_gradients(self, dtype): # Only test theoretical gradients for float32 and float64 # because of the instability of float16 while computing jacobian - x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype) + x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype) theoretical, numerical = tf.test.compute_gradient(lisht, [x]) self.assertAllCloseAccordingToType( @@ -73,10 +56,9 @@ def test_unknown_shape(self): self.assertAllClose(fn(x), lisht(x)) def test_serialization(self): - ref_fn = lisht - config = tf.keras.activations.serialize(ref_fn) + config = tf.keras.activations.serialize(lisht) fn = tf.keras.activations.deserialize(config) - self.assertEqual(fn, ref_fn) + self.assertEqual(fn, lisht) def test_serialization_with_layers(self): layer = tf.keras.layers.Dense(3, activation=lisht) From 1bdcc00afd725479475b0d30a84dc402b599a20d Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 23 Sep 2019 14:55:36 +0800 Subject: [PATCH 08/12] format code --- tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h index 845dca62cf..50d4f68146 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h @@ -51,7 +51,8 @@ struct LishtGrad { typename TTypes::ConstTensor features, typename TTypes::Tensor backprops) { const auto g = features.tanh(); - backprops.device(d) = gradients * (g + features * (static_cast(1.0) - g.square())); + backprops.device(d) = + gradients * (g + features * (static_cast(1.0) - g.square())); } }; From 98044d2e852627f95ff12440432225ec3094a88a Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Mon, 23 Sep 2019 22:27:42 +0800 Subject: [PATCH 09/12] fix typo --- .../custom_ops/activations/cc/kernels/lisht_op.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h index 50d4f68146..ecf6745afb 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h @@ -44,9 +44,9 @@ template struct LishtGrad { // Computes LishtGrad backprops. // - // gradients: gradients backpropagated to the List op. - // features: the inputs that were passed to the List op. - // backprops: gradients to backpropagate to the List inputs. + // gradients: gradients backpropagated to the Lisht op. + // features: the inputs that were passed to the Lisht op. + // backprops: gradients to backpropagate to the Lisht inputs. void operator()(const Device& d, typename TTypes::ConstTensor gradients, typename TTypes::ConstTensor features, typename TTypes::Tensor backprops) { From 388a42eb503ca7f2d6cb3b54380f81de35329e45 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Wed, 25 Sep 2019 21:13:42 -0700 Subject: [PATCH 10/12] fix namespace comment --- tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc index 0cbb7d3870..1a5b1712e9 100644 --- a/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc +++ b/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc @@ -33,5 +33,5 @@ REGISTER_OP("Addons>LishtGrad") .Attr("T: {half, float, double}") .SetShapeFn(shape_inference::MergeBothInputsShapeFn); -} // end namespace addons +} // namespace addons } // namespace tensorflow From 98c37536e49476231e6c6fadd7dd6983d9fc927e Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Wed, 25 Sep 2019 22:23:04 -0700 Subject: [PATCH 11/12] remove extra the --- tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h index ecf6745afb..a3b5e85ca0 100644 --- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h +++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h @@ -45,7 +45,7 @@ struct LishtGrad { // Computes LishtGrad backprops. // // gradients: gradients backpropagated to the Lisht op. - // features: the inputs that were passed to the Lisht op. + // features: inputs that were passed to the Lisht op. // backprops: gradients to backpropagate to the Lisht inputs. void operator()(const Device& d, typename TTypes::ConstTensor gradients, typename TTypes::ConstTensor features, From bb674eb31e216799e380d46bb36e83cb5ef545a6 Mon Sep 17 00:00:00 2001 From: Tzu-Wei Sung Date: Sun, 29 Sep 2019 11:43:18 -0700 Subject: [PATCH 12/12] change test size to small --- tensorflow_addons/activations/BUILD | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow_addons/activations/BUILD b/tensorflow_addons/activations/BUILD index e1885a8c40..a1972f34c9 100644 --- a/tensorflow_addons/activations/BUILD +++ b/tensorflow_addons/activations/BUILD @@ -21,7 +21,7 @@ py_library( py_test( name = "sparsemax_test", - size = "medium", + size = "small", srcs = [ "sparsemax_test.py", ], @@ -34,7 +34,7 @@ py_test( py_test( name = "gelu_test", - size = "medium", + size = "small", srcs = [ "gelu_test.py", ], @@ -47,7 +47,7 @@ py_test( py_test( name = "hardshrink_test", - size = "medium", + size = "small", srcs = [ "hardshrink_test.py", ], @@ -60,7 +60,7 @@ py_test( py_test( name = "lisht_test", - size = "medium", + size = "small", srcs = [ "lisht_test.py", ], @@ -73,7 +73,7 @@ py_test( py_test( name = "tanhshrink_test", - size = "medium", + size = "small", srcs = [ "tanhshrink_test.py", ],