From 90197f872a19dbdade99b409d7e1e3c1b7ff4d08 Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Fri, 20 Sep 2019 03:12:05 +0800
Subject: [PATCH 01/12] add lisht kernel

---
 tensorflow_addons/activations/BUILD           |  14 +++
 tensorflow_addons/activations/README.md       |   5 +-
 tensorflow_addons/activations/__init__.py     |   1 +
 tensorflow_addons/activations/lisht.py        |  49 ++++++++
 tensorflow_addons/activations/lisht_test.py   |  91 +++++++++++++++
 .../custom_ops/activations/BUILD              |  26 +++++
 .../activations/cc/kernels/lisht_op.cc        |  79 +++++++++++++
 .../activations/cc/kernels/lisht_op.h         | 106 ++++++++++++++++++
 .../activations/cc/kernels/lisht_op_gpu.cu.cc |  38 +++++++
 .../custom_ops/activations/cc/ops/lisht_op.cc |  37 ++++++
 10 files changed, 444 insertions(+), 2 deletions(-)
 create mode 100644 tensorflow_addons/activations/lisht.py
 create mode 100644 tensorflow_addons/activations/lisht_test.py
 create mode 100644 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
 create mode 100644 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
 create mode 100644 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
 create mode 100644 tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc

diff --git a/tensorflow_addons/activations/BUILD b/tensorflow_addons/activations/BUILD
index e5f9640bb4..e1885a8c40 100644
--- a/tensorflow_addons/activations/BUILD
+++ b/tensorflow_addons/activations/BUILD
@@ -8,6 +8,7 @@ py_library(
         "__init__.py",
         "gelu.py",
         "hardshrink.py",
+        "lisht.py",
         "sparsemax.py",
         "tanhshrink.py",
     ],
@@ -57,6 +58,19 @@ py_test(
     ],
 )
 
+py_test(
+    name = "lisht_test",
+    size = "medium",
+    srcs = [
+        "lisht_test.py",
+    ],
+    main = "lisht_test.py",
+    srcs_version = "PY2AND3",
+    deps = [
+        ":activations",
+    ],
+)
+
 py_test(
     name = "tanhshrink_test",
     size = "medium",
diff --git a/tensorflow_addons/activations/README.md b/tensorflow_addons/activations/README.md
index d34c12b962..6e525d24ec 100644
--- a/tensorflow_addons/activations/README.md
+++ b/tensorflow_addons/activations/README.md
@@ -13,8 +13,9 @@
 |:----------|:-----------|:---------------------------------|
 | gelu      | gelu       | https://arxiv.org/abs/1606.08415 |
 | hardshrink| hardshrink |                                  |
-| sparsemax | Sparsemax  | https://arxiv.org/abs/1602.02068 |
-| tanhshrink | Tanhshrink  |  |
+| lisht     | lisht      | https://arxiv.org/abs/1901.05894 | 
+| sparsemax | sparsemax  | https://arxiv.org/abs/1602.02068 |
+| tanhshrink | tanhshrink  |  |
 
 
 ## Contribution Guidelines
diff --git a/tensorflow_addons/activations/__init__.py b/tensorflow_addons/activations/__init__.py
index 4208b57817..313a78a1e3 100644
--- a/tensorflow_addons/activations/__init__.py
+++ b/tensorflow_addons/activations/__init__.py
@@ -20,5 +20,6 @@
 
 from tensorflow_addons.activations.gelu import gelu
 from tensorflow_addons.activations.hardshrink import hardshrink
+from tensorflow_addons.activations.lisht import lisht
 from tensorflow_addons.activations.sparsemax import sparsemax
 from tensorflow_addons.activations.tanhshrink import tanhshrink
diff --git a/tensorflow_addons/activations/lisht.py b/tensorflow_addons/activations/lisht.py
new file mode 100644
index 0000000000..cbef569792
--- /dev/null
+++ b/tensorflow_addons/activations/lisht.py
@@ -0,0 +1,49 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+from tensorflow_addons.utils import keras_utils
+from tensorflow_addons.utils.resource_loader import get_path_to_datafile
+
+_activation_ops_so = tf.load_op_library(
+    get_path_to_datafile("custom_ops/activations/_activation_ops.so"))
+
+
+@keras_utils.register_keras_custom_object
+@tf.function
+def lisht(x):
+    """LiSHT: Non-Parameteric Linearly Scaled Hyperbolic Tangent Activation Function.
+
+    Computes linearly scaled hyperbolic tangent (LiSHT): `x * tanh(x)`
+
+    See [LiSHT: Non-Parameteric Linearly Scaled Hyperbolic Tangent Activation Function for Neural Networks](https://arxiv.org/abs/1901.05894).
+
+    Args:
+        x: A `Tensor`. Must be one of the following types:
+            `float16`, `float32`, `float64`.
+    Returns:
+        A `Tensor`. Has the same type as `x`.
+    """
+    x = tf.convert_to_tensor(x)
+    return _activation_ops_so.addons_lisht(x)
+
+
+@tf.RegisterGradient("Addons>Lisht")
+def _lisht_grad(op, grad):
+    return _activation_ops_so.addons_lisht_grad(grad, op.inputs[0])
diff --git a/tensorflow_addons/activations/lisht_test.py b/tensorflow_addons/activations/lisht_test.py
new file mode 100644
index 0000000000..c21fbafb13
--- /dev/null
+++ b/tensorflow_addons/activations/lisht_test.py
@@ -0,0 +1,91 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+import numpy as np
+import tensorflow as tf
+from tensorflow_addons.activations import lisht
+from tensorflow_addons.utils import test_utils
+
+
+def _ref_lisht(x):
+    x = tf.convert_to_tensor(x)
+    return x * tf.tanh(x)
+
+
+@test_utils.run_all_in_graph_and_eager_modes
+class LishtTest(tf.test.TestCase, parameterized.TestCase):
+    @parameterized.named_parameters(("float16", np.float16),
+                                    ("float32", np.float32),
+                                    ("float64", np.float64))
+    def test_lisht(self, dtype):
+        x = (np.random.rand(2, 3, 4) * 5.0 - 2.5).astype(dtype)
+        self.assertAllCloseAccordingToType(lisht(x), _ref_lisht(x))
+
+    @parameterized.named_parameters(("float16", np.float16),
+                                    ("float32", np.float32),
+                                    ("float64", np.float64))
+    def test_gradients(self, dtype):
+        x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype)
+
+        with tf.GradientTape(persistent=True) as tape:
+            tape.watch(x)
+            y_ref = _ref_lisht(x)
+            y = lisht(x)
+        grad_ref = tape.gradient(y_ref, x)
+        grad = tape.gradient(y, x)
+        self.assertAllCloseAccordingToType(grad, grad_ref)
+
+    @parameterized.named_parameters(("float32", np.float32),
+                                    ("float64", np.float64))
+    def test_theoretical_gradients(self, dtype):
+        # Only test theoretical gradients for float32 and float64
+        # because of the instability of float16 while computing jacobian
+        x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype)
+
+        theoretical, numerical = tf.test.compute_gradient(lisht, [x])
+        self.assertAllCloseAccordingToType(
+            theoretical, numerical, atol=1e-4)
+
+    def test_unknown_shape(self):
+        fn = lisht.get_concrete_function(
+            tf.TensorSpec(shape=None, dtype=tf.float32))
+
+        for shape in [(1,), (1, 2), (1, 2, 3), (1, 2, 3, 4)]:
+            x = tf.ones(shape=shape, dtype=tf.float32)
+            self.assertAllClose(fn(x), lisht(x))
+
+    def test_serialization(self):
+        ref_fn = lisht
+        config = tf.keras.activations.serialize(ref_fn)
+        fn = tf.keras.activations.deserialize(config)
+        self.assertEqual(fn, ref_fn)
+
+    def test_serialization_with_layers(self):
+        layer = tf.keras.layers.Dense(3, activation=lisht)
+        config = tf.keras.layers.serialize(layer)
+        deserialized_layer = tf.keras.layers.deserialize(config)
+        self.assertEqual(deserialized_layer.__class__.__name__,
+                         layer.__class__.__name__)
+        self.assertEqual(deserialized_layer.activation.__name__, "lisht")
+
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tensorflow_addons/custom_ops/activations/BUILD b/tensorflow_addons/custom_ops/activations/BUILD
index b61d7a6fa3..567b06250d 100644
--- a/tensorflow_addons/custom_ops/activations/BUILD
+++ b/tensorflow_addons/custom_ops/activations/BUILD
@@ -49,6 +49,28 @@ cc_library(
     alwayslink = 1,
 )
 
+cc_library(
+    name = "lisht_op_gpu",
+    srcs = [
+        "cc/kernels/lisht_op.h",
+        "cc/kernels/lisht_op_gpu.cu.cc",
+    ],
+    copts = if_cuda_is_configured([
+        "-DGOOGLE_CUDA=1",
+        "-x cuda",
+        "-nvcc_options=relaxed-constexpr",
+        "-nvcc_options=ftz=true",
+    ]),
+    deps = [
+        "@local_config_tf//:libtensorflow_framework",
+        "@local_config_tf//:tf_header_lib",
+    ] + if_cuda_is_configured([
+        "@local_config_cuda//cuda:cuda_libs",
+        "@local_config_cuda//cuda:cuda_headers",
+    ]),
+    alwayslink = 1,
+)
+
 cc_library(
     name = "tanhshrink_op_gpu",
     srcs = [
@@ -78,10 +100,13 @@ cc_binary(
         "cc/kernels/gelu_op.h",
         "cc/kernels/hardshrink_op.cc",
         "cc/kernels/hardshrink_op.h",
+        "cc/kernels/lisht_op.cc",
+        "cc/kernels/lisht_op.h",
         "cc/kernels/tanhshrink_op.cc",
         "cc/kernels/tanhshrink_op.h",
         "cc/ops/gelu_op.cc",
         "cc/ops/hardshrink_op.cc",
+        "cc/ops/lisht_op.cc",
         "cc/ops/tanhshrink_op.cc",
     ],
     copts = [
@@ -96,6 +121,7 @@ cc_binary(
     ] + if_cuda_is_configured([
         ":gelu_op_gpu",
         ":hardshrink_op_gpu",
+        ":lisht_op_gpu",
         ":tanhshrink_op_gpu",
     ]),
 )
diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
new file mode 100644
index 0000000000..7c13283f0e
--- /dev/null
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
@@ -0,0 +1,79 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+namespace addons {
+
+using CPUDevice = Eigen::ThreadPoolDevice;
+
+#define REGISTER_LISHT_KERNELS(type)                                         \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Addons>Lisht").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
+      LishtOp<CPUDevice, type>);                                             \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Addons>LishtGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
+      LishtGradOp<CPUDevice, type>);
+
+// Lisht only makes sense with floating points.
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_LISHT_KERNELS);
+#undef REGISTER_LISHT_KERNELS
+
+#if GOOGLE_CUDA
+
+using GPUDevice = Eigen::GpuDevice;
+
+// Forward declarations of the functor specializations for GPU.
+namespace functor {
+#define DECLARE_GPU_SPEC(T)                                          \
+  template <>                                                        \
+  void Lisht<GPUDevice, T>::operator()(                               \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor features,  \
+      typename TTypes<T>::Tensor activations);     \
+  extern template struct Lisht<GPUDevice, T>;                         \
+                                                                     \
+  template <>                                                        \
+  void LishtGrad<GPUDevice, T>::operator()(                           \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
+      typename TTypes<T>::ConstTensor features,    \
+      typename TTypes<T>::Tensor backprops);                         \
+  extern template struct LishtGrad<GPUDevice, T>;
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
+#undef DECLARE_GPU_SPEC
+}  // namespace functor
+
+// Registration of the GPU implementations.
+#define REGISTER_LISHT_GPU_KERNELS(type)                                     \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Addons>Lisht").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
+      LishtOp<GPUDevice, type>);                                             \
+  REGISTER_KERNEL_BUILDER(                                                  \
+      Name("Addons>LishtGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
+      LishtGradOp<GPUDevice, type>);
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_LISHT_GPU_KERNELS);
+#undef REGISTER_LISHT_GPU_KERNELS
+
+#endif  // GOOGLE_CUDA
+
+}  // end namespace addons
+}  // namespace tensorflow
diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
new file mode 100644
index 0000000000..e73254f97e
--- /dev/null
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
@@ -0,0 +1,106 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_ADDONS_ACTIVATIONS_KERNELS_LISHT_OP_H_
+#define TENSORFLOW_ADDONS_ACTIVATIONS_KERNELS_LISHT_OP_H_
+
+#define EIGEN_USE_THREADS
+
+#include "tensorflow/core/framework/numeric_op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+
+namespace tensorflow {
+namespace addons {
+namespace functor {
+
+// Functor used by LishtOp to do the computations.
+template <typename Device, typename T>
+struct Lisht {
+  // Computes Lisht activation.
+  //
+  // features: any shape.
+  // activations: same shape as "features".
+  void operator()(const Device& d, typename TTypes<T>::ConstTensor features,
+                  typename TTypes<T>::Tensor activations) {
+    activations.device(d) = features * features.tanh();
+  }
+};
+
+// Functor used by LishtGradOp to do the computations.
+template <typename Device, typename T>
+struct LishtGrad {
+  // Computes LishtGrad backprops.
+  //
+  // gradients: gradients backpropagated to the List op.
+  // features: the inputs that were passed to the List op.
+  // backprops: gradients to backpropagate to the List inputs.
+  void operator()(const Device& d, typename TTypes<T>::ConstTensor gradients,
+                  typename TTypes<T>::ConstTensor features,
+                  typename TTypes<T>::Tensor backprops) {
+      const auto g = features.tanh();
+      backprops.device(d) = gradients * (features + g - features * g.square());
+  }
+};
+
+}  // namespace functor
+
+template <typename Device, typename T>
+class LishtOp : public UnaryElementWiseOp<T, LishtOp<Device, T>> {
+ public:
+  explicit LishtOp(OpKernelConstruction* context)
+      : UnaryElementWiseOp<T, LishtOp<Device, T>>::UnaryElementWiseOp(context) {
+  }
+
+  void Operate(OpKernelContext* context, const Tensor& input, Tensor* output) {
+    functor::Lisht<Device, T> functor;
+    functor(context->eigen_device<Device>(), input.flat<T>(),
+            output->flat<T>());
+  }
+};
+
+template <typename Device, typename T>
+class LishtGradOp : public BinaryElementWiseOp<T, LishtGradOp<Device, T>> {
+ public:
+  explicit LishtGradOp(OpKernelConstruction* context)
+      : BinaryElementWiseOp<T, LishtGradOp<Device, T>>::BinaryElementWiseOp(
+            context) {
+  }
+
+  void OperateNoTemplate(OpKernelContext* context, const Tensor& g,
+                         const Tensor& a, Tensor* output);
+
+  template <int NDIMS>
+  void Operate(OpKernelContext* context, const Tensor& g, const Tensor& a,
+               Tensor* output) {
+    OperateNoTemplate(context, g, a, output);
+  }
+};
+
+template <typename Device, typename T>
+void LishtGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
+                                              const Tensor& g, const Tensor& a,
+                                              Tensor* output) {
+  functor::LishtGrad<Device, T> functor;
+  functor(context->eigen_device<Device>(), g.flat<T>(), a.flat<T>(),
+          output->flat<T>());
+}
+
+}  // namespace addons
+}  // namespace tensorflow
+
+#undef EIGEN_USE_THREADS
+
+#endif  // TENSORFLOW_ADDONS_ACTIVATIONS_KERNELS_LISHT_OP_H_
diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
new file mode 100644
index 0000000000..c8646a336e
--- /dev/null
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
@@ -0,0 +1,38 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#if GOOGLE_CUDA
+
+#define EIGEN_USE_GPU
+
+#include "tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h"
+#include "tensorflow/core/framework/register_types.h"
+#include "third_party/eigen3/Eigen/Core"
+
+namespace tensorflow {
+namespace addons {
+
+using GPUDevice = Eigen::GpuDevice;
+
+#define DEFINE_GPU_KERNELS(T)                  \
+  template struct functor::Lisht<GPUDevice, T>; \
+  template struct functor::LishtGrad<GPUDevice, T>;
+
+TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
+
+}  // end namespace addons
+}  // namespace tensorflow
+
+#endif  // GOOGLE_CUDA
diff --git a/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc
new file mode 100644
index 0000000000..0cbb7d3870
--- /dev/null
+++ b/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc
@@ -0,0 +1,37 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/framework/common_shape_fns.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/shape_inference.h"
+
+namespace tensorflow {
+namespace addons {
+
+REGISTER_OP("Addons>Lisht")
+    .Input("features: T")
+    .Output("activations: T")
+    .Attr("T: {half, float, double}")
+    .SetShapeFn(shape_inference::UnchangedShape);
+
+REGISTER_OP("Addons>LishtGrad")
+    .Input("gradients: T")
+    .Input("features: T")
+    .Output("backprops: T")
+    .Attr("T: {half, float, double}")
+    .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
+
+}  // end namespace addons
+}  // namespace tensorflow

From 3f8e8c93a92d54dbc1664a6cd57d64bf714def32 Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Fri, 20 Sep 2019 03:17:05 +0800
Subject: [PATCH 02/12] update README

---
 tensorflow_addons/activations/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tensorflow_addons/activations/README.md b/tensorflow_addons/activations/README.md
index 6e525d24ec..ede5eb30fb 100644
--- a/tensorflow_addons/activations/README.md
+++ b/tensorflow_addons/activations/README.md
@@ -4,9 +4,10 @@
 | Submodule | Maintainers               | Contact Info                             |
 |:----------|:--------------------------|:-----------------------------------------|
 | gelu      | @AakashKumarNain @WindQAQ | aakashnain@outlook.com windqaq@gmail.com |
-| hardshrink| @WindQAQ                  | windqaq@gmail.com
+| hardshrink| @WindQAQ                  | windqaq@gmail.com                        |
+| lisht     | @WindQAQ                  | windqaq@gmail.com                        |
 | sparsemax | @AndreasMadsen            | amwwebdk+github@gmail.com                |
-| tanhshrink | @fsx950223            | fsx950223@gmail.com                |
+| tanhshrink| @fsx950223                | fsx950223@gmail.com                      |
 
 ## Contents
 | Submodule | Activation | Reference                        |
@@ -15,7 +16,7 @@
 | hardshrink| hardshrink |                                  |
 | lisht     | lisht      | https://arxiv.org/abs/1901.05894 | 
 | sparsemax | sparsemax  | https://arxiv.org/abs/1602.02068 |
-| tanhshrink | tanhshrink  |  |
+| tanhshrink| tanhshrink |                                  |
 
 
 ## Contribution Guidelines
@@ -23,7 +24,6 @@
 In order to conform with the current API standard, all activations
 must:
  * Be a `tf.function`.
- * Have the signature `fn(input, axis=-1, name=None)`.
  * [Register as a keras global object](https://github.com/tensorflow/addons/blob/master/tensorflow_addons/utils/python/keras_utils.py)
   so it can be serialized properly.
  * Add the addon to the `py_library` in this sub-package's BUILD file.

From c1e599df64d76eb36d39b86e361900cf1cd9f2f9 Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Fri, 20 Sep 2019 09:23:15 +0800
Subject: [PATCH 03/12] format code

---
 tensorflow_addons/activations/lisht_test.py    |  3 +--
 .../activations/cc/kernels/lisht_op.cc         | 18 +++++++++---------
 .../activations/cc/kernels/lisht_op.h          | 11 +++++------
 .../activations/cc/kernels/lisht_op_gpu.cu.cc  |  2 +-
 4 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/tensorflow_addons/activations/lisht_test.py b/tensorflow_addons/activations/lisht_test.py
index c21fbafb13..ca337ef569 100644
--- a/tensorflow_addons/activations/lisht_test.py
+++ b/tensorflow_addons/activations/lisht_test.py
@@ -61,8 +61,7 @@ def test_theoretical_gradients(self, dtype):
         x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype)
 
         theoretical, numerical = tf.test.compute_gradient(lisht, [x])
-        self.assertAllCloseAccordingToType(
-            theoretical, numerical, atol=1e-4)
+        self.assertAllCloseAccordingToType(theoretical, numerical, atol=1e-4)
 
     def test_unknown_shape(self):
         fn = lisht.get_concrete_function(
diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
index 7c13283f0e..686b5adf70 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
@@ -26,10 +26,10 @@ namespace addons {
 using CPUDevice = Eigen::ThreadPoolDevice;
 
 #define REGISTER_LISHT_KERNELS(type)                                         \
-  REGISTER_KERNEL_BUILDER(                                                  \
+  REGISTER_KERNEL_BUILDER(                                                   \
       Name("Addons>Lisht").Device(DEVICE_CPU).TypeConstraint<type>("T"),     \
       LishtOp<CPUDevice, type>);                                             \
-  REGISTER_KERNEL_BUILDER(                                                  \
+  REGISTER_KERNEL_BUILDER(                                                   \
       Name("Addons>LishtGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"), \
       LishtGradOp<CPUDevice, type>);
 
@@ -45,15 +45,15 @@ using GPUDevice = Eigen::GpuDevice;
 namespace functor {
 #define DECLARE_GPU_SPEC(T)                                          \
   template <>                                                        \
-  void Lisht<GPUDevice, T>::operator()(                               \
+  void Lisht<GPUDevice, T>::operator()(                              \
       const GPUDevice& d, typename TTypes<T>::ConstTensor features,  \
-      typename TTypes<T>::Tensor activations);     \
-  extern template struct Lisht<GPUDevice, T>;                         \
+      typename TTypes<T>::Tensor activations);                       \
+  extern template struct Lisht<GPUDevice, T>;                        \
                                                                      \
   template <>                                                        \
-  void LishtGrad<GPUDevice, T>::operator()(                           \
+  void LishtGrad<GPUDevice, T>::operator()(                          \
       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
-      typename TTypes<T>::ConstTensor features,    \
+      typename TTypes<T>::ConstTensor features,                      \
       typename TTypes<T>::Tensor backprops);                         \
   extern template struct LishtGrad<GPUDevice, T>;
 
@@ -63,10 +63,10 @@ TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
 
 // Registration of the GPU implementations.
 #define REGISTER_LISHT_GPU_KERNELS(type)                                     \
-  REGISTER_KERNEL_BUILDER(                                                  \
+  REGISTER_KERNEL_BUILDER(                                                   \
       Name("Addons>Lisht").Device(DEVICE_GPU).TypeConstraint<type>("T"),     \
       LishtOp<GPUDevice, type>);                                             \
-  REGISTER_KERNEL_BUILDER(                                                  \
+  REGISTER_KERNEL_BUILDER(                                                   \
       Name("Addons>LishtGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"), \
       LishtGradOp<GPUDevice, type>);
 
diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
index e73254f97e..c40664dee8 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
@@ -50,8 +50,8 @@ struct LishtGrad {
   void operator()(const Device& d, typename TTypes<T>::ConstTensor gradients,
                   typename TTypes<T>::ConstTensor features,
                   typename TTypes<T>::Tensor backprops) {
-      const auto g = features.tanh();
-      backprops.device(d) = gradients * (features + g - features * g.square());
+    const auto g = features.tanh();
+    backprops.device(d) = gradients * (features + g - features * g.square());
   }
 };
 
@@ -76,8 +76,7 @@ class LishtGradOp : public BinaryElementWiseOp<T, LishtGradOp<Device, T>> {
  public:
   explicit LishtGradOp(OpKernelConstruction* context)
       : BinaryElementWiseOp<T, LishtGradOp<Device, T>>::BinaryElementWiseOp(
-            context) {
-  }
+            context) {}
 
   void OperateNoTemplate(OpKernelContext* context, const Tensor& g,
                          const Tensor& a, Tensor* output);
@@ -91,8 +90,8 @@ class LishtGradOp : public BinaryElementWiseOp<T, LishtGradOp<Device, T>> {
 
 template <typename Device, typename T>
 void LishtGradOp<Device, T>::OperateNoTemplate(OpKernelContext* context,
-                                              const Tensor& g, const Tensor& a,
-                                              Tensor* output) {
+                                               const Tensor& g, const Tensor& a,
+                                               Tensor* output) {
   functor::LishtGrad<Device, T> functor;
   functor(context->eigen_device<Device>(), g.flat<T>(), a.flat<T>(),
           output->flat<T>());
diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
index c8646a336e..c356553157 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
@@ -26,7 +26,7 @@ namespace addons {
 
 using GPUDevice = Eigen::GpuDevice;
 
-#define DEFINE_GPU_KERNELS(T)                  \
+#define DEFINE_GPU_KERNELS(T)                   \
   template struct functor::Lisht<GPUDevice, T>; \
   template struct functor::LishtGrad<GPUDevice, T>;
 

From 42f5275efe621f35bcacbe4ec481369b6db65f40 Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Fri, 20 Sep 2019 09:28:29 +0800
Subject: [PATCH 04/12] fix tolerance

---
 tensorflow_addons/activations/lisht_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow_addons/activations/lisht_test.py b/tensorflow_addons/activations/lisht_test.py
index ca337ef569..72379d2334 100644
--- a/tensorflow_addons/activations/lisht_test.py
+++ b/tensorflow_addons/activations/lisht_test.py
@@ -61,7 +61,8 @@ def test_theoretical_gradients(self, dtype):
         x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype)
 
         theoretical, numerical = tf.test.compute_gradient(lisht, [x])
-        self.assertAllCloseAccordingToType(theoretical, numerical, atol=1e-4)
+        self.assertAllCloseAccordingToType(
+            theoretical, numerical, rtol=5e-4, atol=5e-4)
 
     def test_unknown_shape(self):
         fn = lisht.get_concrete_function(

From 31dd728371d64999724e09f2ff632ea76e74fecb Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Mon, 23 Sep 2019 14:36:34 +0800
Subject: [PATCH 05/12] reorder the computation

---
 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
index c40664dee8..845dca62cf 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
@@ -51,7 +51,7 @@ struct LishtGrad {
                   typename TTypes<T>::ConstTensor features,
                   typename TTypes<T>::Tensor backprops) {
     const auto g = features.tanh();
-    backprops.device(d) = gradients * (features + g - features * g.square());
+    backprops.device(d) = gradients * (g + features * (static_cast<T>(1.0) - g.square()));
   }
 };
 

From 360a7558ee90cdd322c2e5409e427d52fa634c84 Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Mon, 23 Sep 2019 14:37:14 +0800
Subject: [PATCH 06/12] unify namespace

---
 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc | 2 +-
 .../custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
index 686b5adf70..05d56a043f 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.cc
@@ -75,5 +75,5 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_LISHT_GPU_KERNELS);
 
 #endif  // GOOGLE_CUDA
 
-}  // end namespace addons
+}  // namespace addons
 }  // namespace tensorflow
diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
index c356553157..66e0f979a1 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op_gpu.cu.cc
@@ -32,7 +32,7 @@ using GPUDevice = Eigen::GpuDevice;
 
 TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS);
 
-}  // end namespace addons
+}  // namespace addons
 }  // namespace tensorflow
 
 #endif  // GOOGLE_CUDA

From ca891a9e29cfdc4edb1b059c76f76ab548bfc37f Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Mon, 23 Sep 2019 14:37:44 +0800
Subject: [PATCH 07/12] clean up testcase

---
 tensorflow_addons/activations/lisht_test.py | 32 +++++----------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/tensorflow_addons/activations/lisht_test.py b/tensorflow_addons/activations/lisht_test.py
index 72379d2334..b4e7fd2dfc 100644
--- a/tensorflow_addons/activations/lisht_test.py
+++ b/tensorflow_addons/activations/lisht_test.py
@@ -25,40 +25,23 @@
 from tensorflow_addons.utils import test_utils
 
 
-def _ref_lisht(x):
-    x = tf.convert_to_tensor(x)
-    return x * tf.tanh(x)
-
-
 @test_utils.run_all_in_graph_and_eager_modes
 class LishtTest(tf.test.TestCase, parameterized.TestCase):
     @parameterized.named_parameters(("float16", np.float16),
                                     ("float32", np.float32),
                                     ("float64", np.float64))
     def test_lisht(self, dtype):
-        x = (np.random.rand(2, 3, 4) * 5.0 - 2.5).astype(dtype)
-        self.assertAllCloseAccordingToType(lisht(x), _ref_lisht(x))
-
-    @parameterized.named_parameters(("float16", np.float16),
-                                    ("float32", np.float32),
-                                    ("float64", np.float64))
-    def test_gradients(self, dtype):
-        x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype)
-
-        with tf.GradientTape(persistent=True) as tape:
-            tape.watch(x)
-            y_ref = _ref_lisht(x)
-            y = lisht(x)
-        grad_ref = tape.gradient(y_ref, x)
-        grad = tape.gradient(y, x)
-        self.assertAllCloseAccordingToType(grad, grad_ref)
+        x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype)
+        expected_result = tf.constant(
+            [1.9280552, 0.7615942, 0.0, 0.7615942, 1.9280552], dtype=dtype)
+        self.assertAllCloseAccordingToType(lisht(x), expected_result)
 
     @parameterized.named_parameters(("float32", np.float32),
                                     ("float64", np.float64))
     def test_theoretical_gradients(self, dtype):
         # Only test theoretical gradients for float32 and float64
         # because of the instability of float16 while computing jacobian
-        x = tf.constant([-3.0, -2.0, -1.0, 0.0, 1.0, 2.0, 3.0], dtype=dtype)
+        x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0], dtype=dtype)
 
         theoretical, numerical = tf.test.compute_gradient(lisht, [x])
         self.assertAllCloseAccordingToType(
@@ -73,10 +56,9 @@ def test_unknown_shape(self):
             self.assertAllClose(fn(x), lisht(x))
 
     def test_serialization(self):
-        ref_fn = lisht
-        config = tf.keras.activations.serialize(ref_fn)
+        config = tf.keras.activations.serialize(lisht)
         fn = tf.keras.activations.deserialize(config)
-        self.assertEqual(fn, ref_fn)
+        self.assertEqual(fn, lisht)
 
     def test_serialization_with_layers(self):
         layer = tf.keras.layers.Dense(3, activation=lisht)

From 1bdcc00afd725479475b0d30a84dc402b599a20d Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Mon, 23 Sep 2019 14:55:36 +0800
Subject: [PATCH 08/12] format code

---
 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
index 845dca62cf..50d4f68146 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
@@ -51,7 +51,8 @@ struct LishtGrad {
                   typename TTypes<T>::ConstTensor features,
                   typename TTypes<T>::Tensor backprops) {
     const auto g = features.tanh();
-    backprops.device(d) = gradients * (g + features * (static_cast<T>(1.0) - g.square()));
+    backprops.device(d) =
+        gradients * (g + features * (static_cast<T>(1.0) - g.square()));
   }
 };
 

From 98044d2e852627f95ff12440432225ec3094a88a Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Mon, 23 Sep 2019 22:27:42 +0800
Subject: [PATCH 09/12] fix typo

---
 .../custom_ops/activations/cc/kernels/lisht_op.h            | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
index 50d4f68146..ecf6745afb 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
@@ -44,9 +44,9 @@ template <typename Device, typename T>
 struct LishtGrad {
   // Computes LishtGrad backprops.
   //
-  // gradients: gradients backpropagated to the List op.
-  // features: the inputs that were passed to the List op.
-  // backprops: gradients to backpropagate to the List inputs.
+  // gradients: gradients backpropagated to the Lisht op.
+  // features: the inputs that were passed to the Lisht op.
+  // backprops: gradients to backpropagate to the Lisht inputs.
   void operator()(const Device& d, typename TTypes<T>::ConstTensor gradients,
                   typename TTypes<T>::ConstTensor features,
                   typename TTypes<T>::Tensor backprops) {

From 388a42eb503ca7f2d6cb3b54380f81de35329e45 Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Wed, 25 Sep 2019 21:13:42 -0700
Subject: [PATCH 10/12] fix namespace comment

---
 tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc b/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc
index 0cbb7d3870..1a5b1712e9 100644
--- a/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc
+++ b/tensorflow_addons/custom_ops/activations/cc/ops/lisht_op.cc
@@ -33,5 +33,5 @@ REGISTER_OP("Addons>LishtGrad")
     .Attr("T: {half, float, double}")
     .SetShapeFn(shape_inference::MergeBothInputsShapeFn);
 
-}  // end namespace addons
+}  // namespace addons
 }  // namespace tensorflow

From 98c37536e49476231e6c6fadd7dd6983d9fc927e Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Wed, 25 Sep 2019 22:23:04 -0700
Subject: [PATCH 11/12] remove extra the

---
 tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
index ecf6745afb..a3b5e85ca0 100644
--- a/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
+++ b/tensorflow_addons/custom_ops/activations/cc/kernels/lisht_op.h
@@ -45,7 +45,7 @@ struct LishtGrad {
   // Computes LishtGrad backprops.
   //
   // gradients: gradients backpropagated to the Lisht op.
-  // features: the inputs that were passed to the Lisht op.
+  // features: inputs that were passed to the Lisht op.
   // backprops: gradients to backpropagate to the Lisht inputs.
   void operator()(const Device& d, typename TTypes<T>::ConstTensor gradients,
                   typename TTypes<T>::ConstTensor features,

From bb674eb31e216799e380d46bb36e83cb5ef545a6 Mon Sep 17 00:00:00 2001
From: Tzu-Wei Sung <windqaq@gmail.com>
Date: Sun, 29 Sep 2019 11:43:18 -0700
Subject: [PATCH 12/12] change test size to small

---
 tensorflow_addons/activations/BUILD | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tensorflow_addons/activations/BUILD b/tensorflow_addons/activations/BUILD
index e1885a8c40..a1972f34c9 100644
--- a/tensorflow_addons/activations/BUILD
+++ b/tensorflow_addons/activations/BUILD
@@ -21,7 +21,7 @@ py_library(
 
 py_test(
     name = "sparsemax_test",
-    size = "medium",
+    size = "small",
     srcs = [
         "sparsemax_test.py",
     ],
@@ -34,7 +34,7 @@ py_test(
 
 py_test(
     name = "gelu_test",
-    size = "medium",
+    size = "small",
     srcs = [
         "gelu_test.py",
     ],
@@ -47,7 +47,7 @@ py_test(
 
 py_test(
     name = "hardshrink_test",
-    size = "medium",
+    size = "small",
     srcs = [
         "hardshrink_test.py",
     ],
@@ -60,7 +60,7 @@ py_test(
 
 py_test(
     name = "lisht_test",
-    size = "medium",
+    size = "small",
     srcs = [
         "lisht_test.py",
     ],
@@ -73,7 +73,7 @@ py_test(
 
 py_test(
     name = "tanhshrink_test",
-    size = "medium",
+    size = "small",
     srcs = [
         "tanhshrink_test.py",
     ],