diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py
index a1b6ff8b5bac..fc60e1a557cc 100644
--- a/python/mxnet/ndarray/numpy/_op.py
+++ b/python/mxnet/ndarray/numpy/_op.py
@@ -47,7 +47,7 @@
            'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'roll', 'rot90', 'einsum',
            'true_divide', 'nonzero', 'quantile', 'percentile', 'shares_memory', 'may_share_memory',
            'diff', 'ediff1d', 'resize', 'polyval', 'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite',
-           'where', 'bincount', 'pad', 'cumsum', 'diag', 'diagonal']
+           'where', 'bincount', 'rollaxis', 'pad', 'cumsum', 'diag', 'diagonal']
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -7938,6 +7938,38 @@ def cumsum(a, axis=None, dtype=None, out=None):
     return _api_internal.cumsum(a, axis, dtype, out)
 
 
+@set_module('mxnet.ndarray.numpy')
+def rollaxis(a, axis, start=0):
+    """
+    Roll the specified axis backwards, until it lies in a given position.
+    a
+        Input array.
+    axis : integer
+        The axis to roll backwards. The positions of the other axes do not
+        change relative to one another.
+    start: int, optional
+        The axis is rolled until it lies before this position.
+        The default, 0, results in a “complete” roll.
+
+    Returns
+    -------
+    res : ndarray
+        A view after applying rollaxis to `a` is returned.
+
+    -----
+    Examples
+    --------
+    >>> a = np.ones((3,4,5,6))
+    >>> np.rollaxis(a, 3, 1).shape
+    (3, 6, 4, 5)
+    >>> np.rollaxis(a, 2).shape
+    (5, 3, 4, 6)
+    >>> np.rollaxis(a, 1, 4).shape
+    (3, 5, 6, 4)
+    """
+    return _npi.rollaxis(a, axis, start)
+
+
 @set_module('mxnet.ndarray.numpy')
 def diag(v, k=0):
     """
diff --git a/python/mxnet/numpy/fallback.py b/python/mxnet/numpy/fallback.py
index 54df715b60c0..7dddb100f2fa 100644
--- a/python/mxnet/numpy/fallback.py
+++ b/python/mxnet/numpy/fallback.py
@@ -90,7 +90,6 @@
     'rate',
     'real',
     'result_type',
-    'rollaxis',
     'roots',
     'searchsorted',
     'select',
@@ -182,7 +181,6 @@
 rate = onp.rate
 real = onp.real
 result_type = onp.result_type
-rollaxis = onp.rollaxis
 roots = onp.roots
 searchsorted = onp.searchsorted
 select = onp.select
diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py
index 281a6f7cc3fc..70f668bba1a0 100644
--- a/python/mxnet/numpy/multiarray.py
+++ b/python/mxnet/numpy/multiarray.py
@@ -73,7 +73,7 @@
            'greater', 'less', 'greater_equal', 'less_equal', 'roll', 'rot90', 'einsum', 'true_divide', 'nonzero',
            'quantile', 'percentile', 'shares_memory', 'may_share_memory', 'diff', 'ediff1d', 'resize', 'matmul',
            'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite', 'polyval', 'where', 'bincount',
-           'pad', 'cumsum', 'diag', 'diagonal']
+           'pad', 'cumsum', 'rollaxis', 'diag', 'diagonal']
 
 __all__ += fallback.__all__
 
@@ -10101,7 +10101,42 @@ def cumsum(a, axis=None, dtype=None, out=None):
            [ 4,  9, 15]])
     """
     return _mx_nd_np.cumsum(a, axis=axis, dtype=dtype, out=out)
-# pylint: enable=redefined-outer-name
+
+
+# pylint: disable=redefined-outer-name
+@set_module('mxnet.numpy')
+def rollaxis(a, axis, start=0):
+    """
+    Roll the specified axis backwards, until it lies in a given position.
+
+    Parameters
+    ----------
+    a : ndarray
+        Input array.
+    axis : integer
+        The axis to roll backwards. The positions of the other axes do not
+        change relative to one another.
+    start: int, optional
+        The axis is rolled until it lies before this position.
+        The default, 0, results in a “complete” roll.
+
+    Returns
+    -------
+    res : ndarray
+        A view after applying rollaxis to `a` is returned.
+
+    -----
+    Examples
+    --------
+    >>> a = np.ones((3,4,5,6))
+    >>> np.rollaxis(a, 3, 1).shape
+    (3, 6, 4, 5)
+    >>> np.rollaxis(a, 2).shape
+    (5, 3, 4, 6)
+    >>> np.rollaxis(a, 1, 4).shape
+    (3, 5, 6, 4)
+    """
+    return _mx_nd_np.rollaxis(a, axis, start)
 
 
 @set_module('mxnet.numpy')
diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py
index a2a4cd9d3584..8e388500462d 100644
--- a/python/mxnet/symbol/numpy/_symbol.py
+++ b/python/mxnet/symbol/numpy/_symbol.py
@@ -53,7 +53,7 @@
            'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'roll', 'rot90', 'einsum',
            'true_divide', 'quantile', 'percentile', 'shares_memory', 'may_share_memory', 'diff', 'ediff1d',
            'resize', 'polyval', 'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite',
-           'where', 'bincount', 'pad', 'cumsum', 'diag', 'diagonal']
+           'where', 'bincount', 'pad', 'rollaxis', 'cumsum', 'diag', 'diagonal']
 
 
 @set_module('mxnet.symbol.numpy')
@@ -6968,6 +6968,41 @@ def cumsum(a, axis=None, dtype=None, out=None):
     return _npi.cumsum(a, axis=axis, dtype=dtype, out=out)
 
 
+@set_module('mxnet.symbol.numpy')
+def rollaxis(a, axis, start=0):
+    """
+    Roll the specified axis backwards, until it lies in a given position.
+
+    Parameters
+    ----------
+    a : ndarray
+        Input array.
+    axis : integer
+        The axis to roll backwards. The positions of the other axes do not
+        change relative to one another.
+    start: int, optional
+        The axis is rolled until it lies before this position.
+        The default, 0, results in a “complete” roll.
+
+    Returns
+    -------
+    res : ndarray
+        A view after applying rollaxis to `a` is returned.
+
+    -----
+    Examples
+    --------
+    >>> a = np.ones((3,4,5,6))
+    >>> np.rollaxis(a, 3, 1).shape
+    (3, 6, 4, 5)
+    >>> np.rollaxis(a, 2).shape
+    (5, 3, 4, 6)
+    >>> np.rollaxis(a, 1, 4).shape
+    (3, 5, 6, 4)
+    """
+    return _npi.rollaxis(a, axis, start)
+
+
 @set_module('mxnet.symbol.numpy')
 def diag(v, k=0):
     """
diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h
index 2e48596cee9c..ed6bf27ac0f9 100644
--- a/src/operator/numpy/np_matrix_op-inl.h
+++ b/src/operator/numpy/np_matrix_op-inl.h
@@ -517,6 +517,47 @@ void NumpyFlipForward(const nnvm::NodeAttrs& attrs,
   NumpyFlipForwardImpl<xpu>(ctx, inputs, outputs, stride_, trailing_, flip_index);
 }
 
+struct NumpyRollaxisParam : public dmlc::Parameter<NumpyRollaxisParam> {
+  int axis;
+  int start;
+  DMLC_DECLARE_PARAMETER(NumpyRollaxisParam) {
+    DMLC_DECLARE_FIELD(axis)
+    .describe("The axis to roll backwards. "
+    "The positions of the other axes do not change relative to one another.");
+    DMLC_DECLARE_FIELD(start)
+    .set_default(0)
+    .describe("The axis is rolled until it lies before this position. "
+              "The default, 0, results in a “complete” roll.");
+  }
+};
+
+inline mxnet::TShape NumpyRollaxisShapeImpl(int axis,
+                                            int start,
+                                            const int& ndim) {
+  mxnet::TShape axes(ndim, -1);
+  if (axis < 0) {
+    axis += ndim;
+  }
+  if (start < 0) {
+    start += ndim;
+  }
+  if (axis < start) {
+    axes[start - 1] = axis;
+  } else {
+    axes[start] = axis;
+  }
+  int new_axis = 0;
+  for (int i = 0; i < axes.ndim(); i++) {
+    if (axes[i] < 0) {
+      if (new_axis == axis) {
+        new_axis++;
+      }
+      axes[i] = new_axis++;
+    }
+  }
+  return axes;
+}
+
 struct NumpyMoveaxisParam : public dmlc::Parameter<NumpyMoveaxisParam> {
   mxnet::TShape source;
   mxnet::TShape destination;
@@ -601,6 +642,63 @@ void NumpyMoveaxisCompute(const nnvm::NodeAttrs& attrs,
   })
 }
 
+template<typename xpu>
+void NumpyRollaxisCompute(const nnvm::NodeAttrs& attrs,
+                          const OpContext& ctx,
+                          const std::vector<TBlob>& inputs,
+                          const std::vector<OpReqType>& req,
+                          const std::vector<TBlob>& outputs) {
+  using namespace mshadow;
+  using namespace mshadow::expr;
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 1U);
+  CHECK_EQ(req[0], kWriteTo) << "Rollaxis does not support inplace";
+  mxnet::TShape axes;
+  const NumpyRollaxisParam& param = nnvm::get<NumpyRollaxisParam>(attrs.parsed);
+  axes = NumpyRollaxisShapeImpl(param.axis, param.start, inputs[0].ndim());
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, Dtype, {
+    TransposeImpl<xpu>(ctx.run_ctx, inputs[0], outputs[0], axes);
+  })
+}
+
+template<typename xpu>
+void NumpyRollaxisBackward(const nnvm::NodeAttrs &attrs,
+                            const OpContext &ctx,
+                            const std::vector<TBlob> &inputs,
+                            const std::vector<OpReqType> &req,
+                            const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  using namespace mshadow::expr;
+  const NumpyRollaxisParam& param = nnvm::get<NumpyRollaxisParam>(attrs.parsed);
+  int axis_origin = param.axis;
+  int start_origin = param.start;
+  int ndim = inputs[0].ndim();
+
+  int axis;
+  int start;
+
+  if (axis_origin < 0) {
+    axis_origin += ndim;
+  }
+
+  if (start_origin < 0) {
+    start_origin += ndim;
+  }
+
+  if (axis_origin < start_origin) {
+    axis = start_origin - 1;
+    start = axis_origin;
+  } else {
+    axis = start_origin;
+    start = axis_origin + 1;
+  }
+  mxnet::TShape axes;
+  axes = NumpyRollaxisShapeImpl(axis, start, inputs[0].ndim());
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, Dtype, {
+    TransposeImpl<xpu>(ctx.run_ctx, inputs[0], outputs[0], axes);
+  })
+}
+
 struct NumpyRot90Param : public dmlc::Parameter<NumpyRot90Param> {
   int k;
   dmlc::optional<mxnet::TShape> axes;
diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc
index 1c0a8a610a6e..9e2d91a31815 100644
--- a/src/operator/numpy/np_matrix_op.cc
+++ b/src/operator/numpy/np_matrix_op.cc
@@ -34,6 +34,7 @@ namespace op {
 DMLC_REGISTER_PARAMETER(NumpyTransposeParam);
 DMLC_REGISTER_PARAMETER(NumpyRollParam);
 DMLC_REGISTER_PARAMETER(NumpyMoveaxisParam);
+DMLC_REGISTER_PARAMETER(NumpyRollaxisParam);
 DMLC_REGISTER_PARAMETER(NumpyRot90Param);
 DMLC_REGISTER_PARAMETER(NumpyReshapeParam);
 DMLC_REGISTER_PARAMETER(NumpyXReshapeParam);
@@ -1190,6 +1191,69 @@ NNVM_REGISTER_OP(_npi_roll)
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
 .add_arguments(NumpyRollParam::__FIELDS__());
 
+bool NumpyRollaxisShape(const nnvm::NodeAttrs& attrs,
+                        mxnet::ShapeVector *in_attrs,
+                        mxnet::ShapeVector *out_attrs) {
+  const NumpyRollaxisParam& param = nnvm::get<NumpyRollaxisParam>(attrs.parsed);
+  // check 1 input, 1 output
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+
+  // check transpose dimentions no more than 6
+  mxnet::TShape& shp = (*in_attrs)[0];
+  CHECK_LE(shp.ndim(), 6) << "Transpose support at most 6 dimensions";
+
+  // check axis and start range
+  CHECK_GE(param.axis, -shp.ndim())
+  << "axis must be within the range of "
+  << -shp.ndim() << " and " << shp.ndim() - 1;
+  CHECK_LT(param.axis, shp.ndim())
+  << "axis must be within the range of "
+  << -shp.ndim() << " and " << shp.ndim() - 1;
+  CHECK_GE(param.start, -shp.ndim())
+  << "start must be within the range of "
+  << -shp.ndim() << " and " << shp.ndim();
+  CHECK_LE(param.start, shp.ndim())
+  << "start must be within the range of "
+  << -shp.ndim() << " and " << shp.ndim();
+
+  // generate output shape
+  mxnet::TShape ret(shp.ndim(), -1);
+  mxnet::TShape axes;
+
+  axes = NumpyRollaxisShapeImpl(param.axis, param.start, shp.ndim());
+  for (int i = 0; i < shp.ndim(); ++i) {
+    CHECK(axes[i] < static_cast<int64_t>(shp.ndim()));
+    ret[i] = shp[axes[i]];
+  }
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
+  return shape_is_known(ret);
+}
+
+NNVM_REGISTER_OP(_npi_rollaxis)
+.describe(R"code(Roll the specified axis backwards, 
+until it lies in a given position.)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<NumpyRollaxisParam>)
+.set_attr<nnvm::FListInputNames>("FListInputNames",
+  [](const NodeAttrs& attrs) {
+    return std::vector<std::string>{"data"};
+  })
+.set_attr<mxnet::FInferShape>("FInferShape", NumpyRollaxisShape)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", NumpyRollaxisCompute<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_npi_rollaxis_backward"})
+.add_argument("data", "NDArray-or-Symbol", "Input ndarray")
+.add_arguments(NumpyRollaxisParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_npi_rollaxis_backward)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<NumpyRollaxisParam>)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FCompute>("FCompute<cpu>", NumpyRollaxisBackward<cpu>);
+
 template<>
 void NumpyFlipForwardImpl<cpu>(const OpContext& ctx,
                                const std::vector<TBlob>& inputs,
diff --git a/src/operator/numpy/np_matrix_op.cu b/src/operator/numpy/np_matrix_op.cu
index c4b3290d58b7..4871629d98e5 100644
--- a/src/operator/numpy/np_matrix_op.cu
+++ b/src/operator/numpy/np_matrix_op.cu
@@ -112,6 +112,12 @@ NNVM_REGISTER_OP(_backward_npi_flip)
 NNVM_REGISTER_OP(_np_moveaxis)
 .set_attr<FCompute>("FCompute<gpu>", NumpyMoveaxisCompute<gpu>);
 
+NNVM_REGISTER_OP(_npi_rollaxis)
+.set_attr<FCompute>("FCompute<gpu>", NumpyRollaxisCompute<gpu>);
+
+NNVM_REGISTER_OP(_npi_rollaxis_backward)
+.set_attr<FCompute>("FCompute<gpu>", NumpyRollaxisBackward<gpu>);
+
 NNVM_REGISTER_OP(_npi_rot90)
 .set_attr<FCompute>("FCompute<gpu>", NumpyRot90Compute<gpu>);
 
diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
index 3a8325cfb6e2..3fb6fe39e0c5 100644
--- a/tests/python/unittest/test_numpy_op.py
+++ b/tests/python/unittest/test_numpy_op.py
@@ -8478,6 +8478,46 @@ def hybrid_forward(self, F, x, *args, **kwargs):
         assert ret.asnumpy().shape == expected_ret.shape
 
 
+@with_seed()
+@use_np
+def test_np_rollaxis():
+    class TestRollaxis(HybridBlock):
+        def __init__(self, axis=0, start=0):
+            super(TestRollaxis, self).__init__()
+            self._axis = axis
+            self._start = start
+             
+        def hybrid_forward(self, F, a, *args, **kwargs):
+            return F.np.rollaxis(a, axis=self._axis, start=self._start)
+
+    dtypes = ['int32', 'int64', 'float16', 'float32', 'float64']
+    for hybridize in [False, True]:
+        for dtype in dtypes:
+            for ndim in [0, 1, 2, 3, 4, 5, 6]:
+                shape = rand_shape_nd(ndim, dim=5, allow_zero_size=True)
+                np_data = _np.random.uniform(low=-100, high=100, size=shape).astype(dtype)
+                mx_data = np.array(np_data, dtype=dtype)
+                for axis in range(-ndim, ndim):
+                    for start in range(-ndim, ndim + 1):
+                        # test gluon
+                        test_rollaxis = TestRollaxis(axis, start)
+                        if hybridize:
+                            test_rollaxis.hybridize()
+                        np_out = _np.rollaxis(np_data, axis=axis, start=start)
+                        mx_data.attach_grad()
+                        with mx.autograd.record():
+                            mx_out = test_rollaxis(mx_data)
+                        assert mx_out.shape == np_out.shape
+                        mx_out.backward()
+                        assert same(mx_data.grad.shape, mx_data.shape)
+                        assert same(mx_data.grad.asnumpy(), _np.ones(shape))
+                        # test imperative
+                        np_out = _np.rollaxis(np_data, axis=axis, start=start)
+                        mx_out = np.rollaxis(mx_data, axis=axis, start=start)
+                        assert np_out.dtype == mx_out.dtype
+                        assert same(mx_out.asnumpy(), np_out)
+
+
 if __name__ == '__main__':
     import nose
     nose.runmodule()