diff --git a/src/operator/mshadow_op.h b/src/operator/mshadow_op.h index af7ef513f14e..1d4284e1ac2a 100644 --- a/src/operator/mshadow_op.h +++ b/src/operator/mshadow_op.h @@ -111,6 +111,10 @@ MXNET_UNARY_MATH_OP(sigmoid, 1.0f / (1.0f + math::exp(-a))); MXNET_UNARY_MATH_OP(sigmoid_grad, math::id(a) * (1.0f - math::id(a))); +MXNET_UNARY_MATH_OP(softsign, a / (1.0f + math::fabs(a))); + +MXNET_UNARY_MATH_OP(softsign_grad, 1.0f / math::sqr(1.0f + math::fabs(a))); + MXNET_UNARY_MATH_OP_NC(relu, a > DType(0) ? a : DType(0)); MXNET_UNARY_MATH_OP_NC(relu_grad, a > DType(0) ? DType(1) : DType(0)); diff --git a/src/operator/nn/activation-inl.h b/src/operator/nn/activation-inl.h index a440f97e1382..89a369c6717e 100644 --- a/src/operator/nn/activation-inl.h +++ b/src/operator/nn/activation-inl.h @@ -47,7 +47,7 @@ namespace activation { enum ActivationOpInputs {kData}; enum ActivationOpOutputs {kOut}; enum ActivationOpResource {kTempSpace}; -enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU}; +enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU, kSoftSign}; } // activation struct ActivationParam : public dmlc::Parameter { @@ -59,6 +59,7 @@ struct ActivationParam : public dmlc::Parameter { .add_enum("sigmoid", activation::kSigmoid) .add_enum("tanh", activation::kTanh) .add_enum("softrelu", activation::kSoftReLU) + .add_enum("softsign", activation::kSoftSign) .describe("Activation function to be applied."); } @@ -140,6 +141,10 @@ void ActivationComputeImpl(const ActivationParam ¶m, const OpContext &ctx, ActivationForward( ctx, input, req, output); break; + case activation::kSoftSign: + ActivationForward( + ctx, input, req, output); + break; default: LOG(FATAL) << "unknown activation type"; } @@ -168,6 +173,10 @@ void ActivationGradComputeImpl(const ActivationParam ¶m, const OpContext &ct ActivationBackward( ctx, out_grad, out_data, req, output); break; + case activation::kSoftSign: + ActivationBackward( + ctx, out_grad, out_data, req, output); + break; default: LOG(FATAL) << "unknown activation type"; } diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index 7b79a34a7b81..89059321b693 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -149,6 +149,7 @@ The following activation functions are supported: - `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}` - `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) + exp(-x)}` - `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))` +- `softsign`: :math:`y = \frac{x}{1 + abs(x)}` )code" ADD_FILELINE) .set_attr_parser(ParamParser) diff --git a/src/operator/operator_tune.cc b/src/operator/operator_tune.cc index e0f8306565d9..c13f1ac2fae1 100644 --- a/src/operator/operator_tune.cc +++ b/src/operator/operator_tune.cc @@ -213,6 +213,8 @@ IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::reciprocal); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::reciprocal_grad); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::sigmoid); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::sigmoid_grad); // NOLINT() +IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::softsign); // NOLINT() +IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::softsign_grad); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::relu); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::relu_grad); // NOLINT() IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::tanh); // NOLINT() diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc b/src/operator/tensor/elemwise_unary_op_basic.cc index 95fd3bc7e3ae..acd8f7b23ff3 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cc +++ b/src/operator/tensor/elemwise_unary_op_basic.cc @@ -106,6 +106,23 @@ The storage type of ``sigmoid`` output is always dense MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_sigmoid, unary_bwd); +// softsign +MXNET_OPERATOR_REGISTER_UNARY(softsign) +MXNET_ADD_SPARSE_OP_ALIAS(softsign) +.describe(R"code(Computes softsign of x element-wise. + +.. math:: + y = x / (1 + abs(x)) + +The storage type of ``softsign`` output is always dense + +)code" ADD_FILELINE) + .set_attr("FCompute", UnaryOp::Compute) + .set_attr("FGradient", ElemwiseGradUseIn{"_backward_softsign"}); + +MXNET_OPERATOR_REGISTER_BINARY(_backward_softsign) +.set_attr("FCompute", ElemwiseBinaryOp::Compute >); // copy static void CopyEx(const nnvm::NodeAttrs& attrs, diff --git a/src/operator/tensor/elemwise_unary_op_basic.cu b/src/operator/tensor/elemwise_unary_op_basic.cu index 41eef903401c..8dfa9af74ce9 100644 --- a/src/operator/tensor/elemwise_unary_op_basic.cu +++ b/src/operator/tensor/elemwise_unary_op_basic.cu @@ -40,6 +40,14 @@ NNVM_REGISTER_OP(_backward_sigmoid) .set_attr("FCompute", ElemwiseBinaryOp::Compute< gpu, unary_bwd>); +// softsign +NNVM_REGISTER_OP(softsign) +.set_attr("FCompute", UnaryOp::Compute); + +NNVM_REGISTER_OP(_backward_softsign) +.set_attr("FCompute", ElemwiseBinaryOp::Compute< + gpu, unary_bwd>); + // copy NNVM_REGISTER_OP(_copy) .set_attr("FCompute", UnaryOp::IdentityCompute) diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index 63b7c66855f7..7889e084f74d 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -486,6 +486,21 @@ def fsigmoid(a): check_symbolic_forward(y, [xa], [ya]) check_symbolic_backward(y, [xa], [np.ones(shape)], [ya * (1 - ya)]) +@with_seed() +def test_softsign(): + def fsoftsign(a): + return np.divide(a, (1.0 + np.abs(a))) + def fsoftsign_grad(a): + return np.divide(1.0, np.square((1.0 + np.abs(a)))) + shape = (3, 4) + x = mx.symbol.Variable("x") + y = mx.sym.softsign(x) + xa = np.random.uniform(low=-1.0,high=1.0,size=shape) + ya = fsoftsign(xa) + ya_grad = fsoftsign_grad(xa) + check_numeric_gradient(y, [xa], numeric_eps=1E-3) + check_symbolic_forward(y, [xa], [ya]) + check_symbolic_backward(y, [xa], [np.ones(shape)], [ya_grad]) @with_seed() def test_binary_logic(): @@ -4814,6 +4829,10 @@ def test_unary_math_operators(): lambda x: 1. / (np.exp(-x) + 1.), lambda x: 1. / (np.exp(-x) + 1.) / (np.exp(x) + 1.), -3.0, 3.0], + 'softsign': [lambda x: mx.sym.softsign(x), + lambda x: x / (1. + np.abs(x)), + lambda x: 1. / np.square(1. + np.abs(x)), + -3.0, 3.0], 'sin': [lambda x: mx.sym.sin(x), lambda x: np.sin(x), lambda x: np.cos(x),