Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Softsign op #9851

Merged
merged 4 commits into from
Feb 23, 2018
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/operator/mshadow_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ MXNET_UNARY_MATH_OP(sigmoid, 1.0f / (1.0f + math::exp(-a)));

MXNET_UNARY_MATH_OP(sigmoid_grad, math::id(a) * (1.0f - math::id(a)));

MXNET_UNARY_MATH_OP(softsign, a / (1.0f + math::fabs(a)));

MXNET_UNARY_MATH_OP(softsign_grad, 1.0f / math::sqr(1.0f + math::fabs(a)));

MXNET_UNARY_MATH_OP_NC(relu, a > DType(0) ? a : DType(0));

MXNET_UNARY_MATH_OP_NC(relu_grad, a > DType(0) ? DType(1) : DType(0));
Expand Down
11 changes: 10 additions & 1 deletion src/operator/nn/activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ namespace activation {
enum ActivationOpInputs {kData};
enum ActivationOpOutputs {kOut};
enum ActivationOpResource {kTempSpace};
enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU};
enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU, kSoftSign};
} // activation

struct ActivationParam : public dmlc::Parameter<ActivationParam> {
Expand All @@ -59,6 +59,7 @@ struct ActivationParam : public dmlc::Parameter<ActivationParam> {
.add_enum("sigmoid", activation::kSigmoid)
.add_enum("tanh", activation::kTanh)
.add_enum("softrelu", activation::kSoftReLU)
.add_enum("softsign", activation::kSoftSign)
.describe("Activation function to be applied.");
}

Expand Down Expand Up @@ -140,6 +141,10 @@ void ActivationComputeImpl(const ActivationParam &param, const OpContext &ctx,
ActivationForward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(
ctx, input, req, output);
break;
case activation::kSoftSign:
ActivationForward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>(
ctx, input, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
Expand Down Expand Up @@ -168,6 +173,10 @@ void ActivationGradComputeImpl(const ActivationParam &param, const OpContext &ct
ActivationBackward<xpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
case activation::kSoftSign:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure this is correct?

ActivationGradComputeImpl takes out_grad and out_data to calculate output. In another word,
for y = activation(x), it calculates dx = _backward_activation(dy, y), not dx = _backward_activation(dy, x).

ActivationBackward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>(
ctx, out_grad, out_data, req, output);
break;
default:
LOG(FATAL) << "unknown activation type";
}
Expand Down
1 change: 1 addition & 0 deletions src/operator/nn/activation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ The following activation functions are supported:
- `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
- `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) + exp(-x)}`
- `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
- `softsign`: :math:`y = \frac{x}{1 + abs(x)}`

)code" ADD_FILELINE)
.set_attr_parser(ParamParser<ActivationParam>)
Expand Down
2 changes: 2 additions & 0 deletions src/operator/operator_tune.cc
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::reciprocal); // NOLINT()
IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::reciprocal_grad); // NOLINT()
IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::sigmoid); // NOLINT()
IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::sigmoid_grad); // NOLINT()
IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::softsign); // NOLINT()
IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::softsign_grad); // NOLINT()
IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::relu); // NOLINT()
IMPLEMENT_UNARY_WORKLOAD_BWD(mxnet::op::mshadow_op::relu_grad); // NOLINT()
IMPLEMENT_UNARY_WORKLOAD_FWD(mxnet::op::mshadow_op::tanh); // NOLINT()
Expand Down
17 changes: 17 additions & 0 deletions src/operator/tensor/elemwise_unary_op_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,23 @@ The storage type of ``sigmoid`` output is always dense

MXNET_OPERATOR_REGISTER_BINARY_WITH_SPARSE_CPU(_backward_sigmoid,
unary_bwd<mshadow_op::sigmoid_grad>);
// softsign
MXNET_OPERATOR_REGISTER_UNARY(softsign)
MXNET_ADD_SPARSE_OP_ALIAS(softsign)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This operator doesn't have a sparse implementation. Sparse op alias should not be registered.

.describe(R"code(Computes softsign of x element-wise.

.. math::
y = x / (1 + abs(x))

The storage type of ``softsign`` output is always dense

)code" ADD_FILELINE)
.set_attr<FCompute>("FCompute<cpu>", UnaryOp::Compute<cpu, mshadow_op::softsign>)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_softsign"});

MXNET_OPERATOR_REGISTER_BINARY(_backward_softsign)
.set_attr<FCompute>("FCompute<cpu>", ElemwiseBinaryOp::Compute<cpu,
unary_bwd<mshadow_op::softsign_grad> >);

// copy
static void CopyEx(const nnvm::NodeAttrs& attrs,
Expand Down
8 changes: 8 additions & 0 deletions src/operator/tensor/elemwise_unary_op_basic.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ NNVM_REGISTER_OP(_backward_sigmoid)
.set_attr<FCompute>("FCompute<gpu>", ElemwiseBinaryOp::Compute<
gpu, unary_bwd<mshadow_op::sigmoid_grad>>);

// softsign
NNVM_REGISTER_OP(softsign)
.set_attr<FCompute>("FCompute<gpu>", UnaryOp::Compute<gpu, mshadow_op::softsign>);

NNVM_REGISTER_OP(_backward_softsign)
.set_attr<FCompute>("FCompute<gpu>", ElemwiseBinaryOp::Compute<
gpu, unary_bwd<mshadow_op::softsign_grad>>);

// copy
NNVM_REGISTER_OP(_copy)
.set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>)
Expand Down
18 changes: 18 additions & 0 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,20 @@ def fsigmoid(a):
check_symbolic_forward(y, [xa], [ya])
check_symbolic_backward(y, [xa], [np.ones(shape)], [ya * (1 - ya)])

def test_softsign():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add @with_seed()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there unit test for Activation(act_type='softsign')???

def fsoftsign(a):
return np.divide(a, (1.0 + np.abs(a)))
def fsoftsign_grad(a):
return np.divide(1.0, np.square((1.0 + np.abs(a))))
shape = (3, 4)
x = mx.symbol.Variable("x")
y = mx.sym.softsign(x)
xa = np.random.uniform(low=-1.0,high=1.0,size=shape)
ya = fsoftsign(xa)
ya_grad = fsoftsign_grad(xa)
check_numeric_gradient(y, [xa], numeric_eps=1E-3)
check_symbolic_forward(y, [xa], [ya])
check_symbolic_backward(y, [xa], [np.ones(shape)], [ya_grad])

@with_seed()
def test_binary_logic():
Expand Down Expand Up @@ -4814,6 +4828,10 @@ def test_unary_math_operators():
lambda x: 1. / (np.exp(-x) + 1.),
lambda x: 1. / (np.exp(-x) + 1.) / (np.exp(x) + 1.),
-3.0, 3.0],
'softsign': [lambda x: mx.sym.softsign(x),
lambda x: x / (1. + np.abs(x)),
lambda x: 1. / np.square(1. + np.abs(x)),
-3.0, 3.0],
'sin': [lambda x: mx.sym.sin(x),
lambda x: np.sin(x),
lambda x: np.cos(x),
Expand Down