Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

softsign activation function #9858

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/operator/activation-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ namespace op {
namespace activation {
enum ActivationOpInputs {kData};
enum ActivationOpOutputs {kOut};
enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU};
enum ActivationOpType {kReLU, kSigmoid, kTanh, kSoftReLU, kSoftSign};
} // activation

struct ActivationParam : public dmlc::Parameter<ActivationParam> {
Expand All @@ -54,6 +54,7 @@ struct ActivationParam : public dmlc::Parameter<ActivationParam> {
.add_enum("sigmoid", activation::kSigmoid)
.add_enum("tanh", activation::kTanh)
.add_enum("softrelu", activation::kSoftReLU)
.add_enum("softsign", activation::kSoftSign)
.describe("Activation function to be applied.");
}
};
Expand Down
4 changes: 4 additions & 0 deletions src/operator/activation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ Operator *CreateOp<cpu>(ActivationParam param, int dtype, const TShape& dshape)
case activation::kSoftReLU:
op = new ActivationOp<cpu, mshadow_op::softrelu, mshadow_op::softrelu_grad, DType>();
break;
case activation::kSoftSign:
op = new ActivationOp<cpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>();
break;
default:
LOG(FATAL) << "unknown activation type";
}
Expand All @@ -87,6 +90,7 @@ The following activation functions are supported:
- `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
- `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) + exp(-x)}`
- `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
- `softsign`: :math:`y = \frac{x}{1 + abs(x)}`

)code" ADD_FILELINE)
.add_argument("data", "NDArray-or-Symbol", "Input array to activation function.")
Expand Down
3 changes: 3 additions & 0 deletions src/operator/activation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ Operator *CreateOp<gpu>(ActivationParam param, int dtype, const TShape& dshape)
case activation::kSigmoid:
op = new ActivationOp<gpu, mshadow_op::sigmoid, mshadow_op::sigmoid_grad, DType>();
break;
case activation::kSoftSign:
op = new ActivationOp<gpu, mshadow_op::softsign, mshadow_op::softsign_grad, DType>();
break;
case activation::kTanh:
op = new ActivationOp<gpu, mshadow_op::tanh, mshadow_op::tanh_grad, DType>();
break;
Expand Down
13 changes: 13 additions & 0 deletions src/operator/mshadow_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,19 @@ struct softrelu_grad {
return -DType(expm1f(-a));
}
};
/*! \brief softsign unit */
struct softsign {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a) {
return DType(a / (DType(1.0f) + fabsf(a)));
}
};
struct softsign_grad {
template<typename DType>
MSHADOW_XINLINE static DType Map(DType a) {
return DType(1.0f / powf((DType(1.0f) + fabsf(a)), 2.0f) );
}
};

struct exp {
template<typename DType>
Expand Down
17 changes: 17 additions & 0 deletions src/operator/tensor/elemwise_unary_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ MXNET_OPERATOR_REGISTER_BINARY(_backward_sigmoid)
BinaryLaunch<cpu, kernel_launch_op::sigmoid_grad>);


// softsign
MXNET_OPERATOR_REGISTER_UNARY(softsign)
.describe(R"code(Computes softsign of x element-wise.

.. math::
y = x / (1 + abs(x))

)code" ADD_FILELINE)
.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_softsign"})
.set_attr<FCompute>("FCompute<cpu>",
UnaryLaunch<cpu, kernel_launch_op::softsign>);


MXNET_OPERATOR_REGISTER_BINARY(_backward_softsign)
.set_attr<FCompute>("FCompute<cpu>",
BinaryLaunch<cpu, kernel_launch_op::softsign_grad>);

// copy
MXNET_OPERATOR_REGISTER_UNARY(_copy)
.MXNET_DESCRIBE("Returns a copy of the input.")
Expand Down
6 changes: 6 additions & 0 deletions src/operator/tensor/elemwise_unary_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ NNVM_REGISTER_OP(sigmoid)
NNVM_REGISTER_OP(_backward_sigmoid)
.set_attr<FCompute>("FCompute<gpu>", BinaryLaunch<gpu, kernel_launch_op::sigmoid_grad>);

NNVM_REGISTER_OP(softsign)
.set_attr<FCompute>("FCompute<gpu>", UnaryLaunch<gpu, kernel_launch_op::softsign>);

NNVM_REGISTER_OP(_backward_softsign)
.set_attr<FCompute>("FCompute<gpu>", BinaryLaunch<gpu, kernel_launch_op::softsign_grad>);

// copy
NNVM_REGISTER_OP(_copy)
.set_attr<FCompute>("FCompute<gpu>", IdentityCompute<gpu>);
Expand Down
15 changes: 15 additions & 0 deletions src/operator/tensor/elemwise_unary_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,21 @@ struct relu_grad {
out[i] = out_grad[i] * DType(in[i] > DType(0.0f) ? DType(1.0f) : DType(0.0f));
}
};
/*! \brief softsign unit */
struct softsign {
template<typename DType>
MSHADOW_XINLINE static void Map(int i, DType *out,
const DType *in) {
out[i] = DType(DType(in[i]) / (DType(1.0f) + fabsf(in[i])));
}
};
struct softsign_grad {
template<typename DType>
MSHADOW_XINLINE static void Map(int i, DType *out,
const DType *out_grad, const DType *in) {
out[i] = DType(DType(1.0f) / powf(DType(1.0f) + fabsf(in[i]), 2.0f));
}
};
} // namespace kernel_launch_op

#define MXNET_OPERATOR_REGISTER_UNARY(name) \
Expand Down
14 changes: 14 additions & 0 deletions tests/python/unittest/test_operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,20 @@ def fsigmoid(a):
check_symbolic_forward(y, [xa], [ya])
check_symbolic_backward(y, [xa], [np.ones(shape)], [ya * (1 - ya)])

def test_softsign():
def fsoftsign(a):
return np.divide(a, (1.0 + np.abs(a)))
def fsoftsign_grad(a):
return np.divide(1.0, np.square((1.0 + np.abs(a))))
shape = (3, 4)
x = mx.symbol.Variable("x")
y = mx.sym.softsign(x)
xa = np.random.uniform(low=-1.0,high=1.0,size=shape)
ya = fsoftsign(xa)
ya_grad = fsoftsign_grad(xa)
check_symbolic_forward(y, [xa], [ya])
check_symbolic_backward(y, [xa], [np.zeros(shape)], [ya_grad])

def test_binary_logic():
def _inner_test(forward_gt, logic_sym, x_shape, y_shape, test_scalar=True):
x = mx.symbol.Variable("x")
Expand Down