apache · nswamy · Feb 21, 2018 · Feb 22, 2018
diff --git a/src/operator/activation-inl.h b/src/operator/activation-inl.h
@@ -42,7 +42,7 @@ namespace op {
 namespace activation {
 enum ActivationOpInputs {kData};
 enum ActivationOpOutputs {kOut};
-enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU};
+enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU, kSoftSign};
 }  // activation
 
 struct ActivationParam : public dmlc::Parameter<ActivationParam> {
@@ -54,6 +54,7 @@ struct ActivationParam : public dmlc::Parameter<ActivationParam> {
     .add_enum("sigmoid", activation::kSigmoid)
     .add_enum("tanh", activation::kTanh)
     .add_enum("softrelu", activation::kSoftReLU)
+    .add_enum("softsign", activation::kSoftSign)
     .describe("Activation function to be applied.");
   }
 };

diff --git a/src/operator/activation.cc b/src/operator/activation.cc
@@ -63,6 +63,9 @@ Operator *CreateOp<cpu>(ActivationParam param, int dtype, const TShape& dshape)
       case activation::kSoftReLU:
         op = new ActivationOp<cpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>();
         break;
+      case activation::kSoftSign:
+        op = new ActivationOp<cpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>();
+        break;
       default:
         LOG(FATAL) << "unknown activation type";
     }
@@ -87,6 +90,7 @@ The following activation functions are supported:
 - `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
 - `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) + exp(-x)}`
 - `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
+-  `softsign`: :math:`y = \frac{x}{1 + abs(x)}`
 
 )code" ADD_FILELINE)
 .add_argument("data", "NDArray-or-Symbol", "Input array to activation function.")

diff --git a/src/operator/activation.cu b/src/operator/activation.cu
@@ -54,6 +54,9 @@ Operator *CreateOp<gpu>(ActivationParam param, int dtype, const TShape& dshape)
       case activation::kSigmoid:
         op = new ActivationOp<gpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad, DType>();
         break;
+      case activation::kSoftSign:
+        op = new ActivationOp<gpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>();
+        break;
       case activation::kTanh:
         op = new ActivationOp<gpu, mshadow_op::tanh, mshadow_op::tanh_grad, DType>();
         break;

diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h
@@ -186,6 +186,19 @@ struct softrelu_grad {
     return -DType(expm1f(-a));
   }
 };
+/*! \brief softsign unit */
+struct softsign {
+  template<typename DType>
+  MSHADOW_XINLINE static DType Map(DType a) {
+    return DType(a / (DType(1.0f) + fabsf(a)));
+  }
+};
+struct softsign_grad {
+  template<typename DType>
+  MSHADOW_XINLINE static DType Map(DType a) {
+    return DType(1.0f / powf((DType(1.0f) + fabsf(a)), 2.0f) );
+  }
+};
 
 struct exp {
   template<typename DType>

diff --git a/src/operator/tensor/elemwise_unary_op.cc b/src/operator/tensor/elemwise_unary_op.cc
@@ -62,6 +62,23 @@ MXNET_OPERATOR_REGISTER_BINARY(_backward_sigmoid)
     BinaryLaunch<cpu, kernel_launch_op::sigmoid_grad>);
 
 
+// softsign
+MXNET_OPERATOR_REGISTER_UNARY(softsign)
+  .describe(R"code(Computes softsign of x element-wise.
+
+.. math::
+   y = x / (1 + abs(x))
+
+)code" ADD_FILELINE)
+  .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_softsign"})
+  .set_attr<FCompute>("FCompute<cpu>",
+                      UnaryLaunch<cpu, kernel_launch_op::softsign>);
+
+
+MXNET_OPERATOR_REGISTER_BINARY(_backward_softsign)
+.set_attr<FCompute>("FCompute<cpu>",
+                    BinaryLaunch<cpu, kernel_launch_op::softsign_grad>);
+
 // copy
 MXNET_OPERATOR_REGISTER_UNARY(_copy)
 .MXNET_DESCRIBE("Returns a copy of the input.")

diff --git a/src/operator/tensor/elemwise_unary_op.cu b/src/operator/tensor/elemwise_unary_op.cu
@@ -38,6 +38,12 @@ NNVM_REGISTER_OP(sigmoid)
 NNVM_REGISTER_OP(_backward_sigmoid)
 .set_attr<FCompute>("FCompute<gpu>", BinaryLaunch<gpu, kernel_launch_op::sigmoid_grad>);
 
+NNVM_REGISTER_OP(softsign)
+.set_attr<FCompute>("FCompute<gpu>", UnaryLaunch<gpu, kernel_launch_op::softsign>);
+
+NNVM_REGISTER_OP(_backward_softsign)
+.set_attr<FCompute>("FCompute<gpu>", BinaryLaunch<gpu, kernel_launch_op::softsign_grad>);
+
 // copy
 NNVM_REGISTER_OP(_copy)
 .set_attr<FCompute>("FCompute<gpu>", IdentityCompute<gpu>);

diff --git a/src/operator/tensor/elemwise_unary_op.h b/src/operator/tensor/elemwise_unary_op.h
@@ -170,6 +170,21 @@ struct relu_grad {
     out[i] = out_grad[i] * DType(in[i] > DType(0.0f) ? DType(1.0f) : DType(0.0f));
   }
 };
+/*! \brief softsign unit */
+struct softsign {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType *out,
+                                  const DType *in) {
+    out[i] = DType(DType(in[i]) / (DType(1.0f) + fabsf(in[i])));
+  }
+};
+struct softsign_grad {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType *out,
+                                  const DType *out_grad, const DType *in) {
+    out[i] = DType(DType(1.0f) / powf(DType(1.0f) + fabsf(in[i]), 2.0f));
+  }
+};
 }  // namespace kernel_launch_op
 
 #define MXNET_OPERATOR_REGISTER_UNARY(name)                         \

diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
@@ -398,6 +398,20 @@ def fsigmoid(a):
     check_symbolic_forward(y, [xa], [ya])
     check_symbolic_backward(y, [xa], [np.ones(shape)], [ya * (1 - ya)])
 
+def test_softsign():
+    def fsoftsign(a):
+        return np.divide(a, (1.0 + np.abs(a)))
+    def fsoftsign_grad(a):
+        return np.divide(1.0, np.square((1.0 + np.abs(a))))
+    shape = (3, 4)
+    x = mx.symbol.Variable("x")
+    y = mx.sym.softsign(x)
+    xa = np.random.uniform(low=-1.0,high=1.0,size=shape)
+    ya = fsoftsign(xa)
+    ya_grad = fsoftsign_grad(xa)
+    check_symbolic_forward(y, [xa], [ya])
+    check_symbolic_backward(y, [xa], [np.zeros(shape)], [ya_grad])
+
 def test_binary_logic():
     def _inner_test(forward_gt, logic_sym, x_shape, y_shape, test_scalar=True):
         x = mx.symbol.Variable("x")