[Numpy] Add ffi for np.sum, np.std, np.var, np.average and np.histogr…

…am (#17866) * add ffi for sum, var and std * add ffi wrapper for np.average * add ffi wrapper for np.histogram
apache · Apr 16, 2020 · 7bef85e · 7bef85e
1 parent 9337137
commit 7bef85e
Show file tree

Hide file tree

Showing 22 changed files with 739 additions and 151 deletions.
diff --git a/benchmark/python/ffi/benchmark_ffi.py b/benchmark/python/ffi/benchmark_ffi.py
@@ -60,6 +60,11 @@ def prepare_workloads():
     OpArgMngr.add_workload("tensordot", pool['2x2'], pool['2x2'], ((1, 0), (0, 1)))
     OpArgMngr.add_workload("kron", pool['2x2'], pool['2x2'])
     OpArgMngr.add_workload("cumsum", pool['3x2'], axis=0, out=pool['3x2'])
+    OpArgMngr.add_workload("sum", pool['2x2'], axis=0, keepdims=True, out=pool['1x2'])
+    OpArgMngr.add_workload("std", pool['2x2'], axis=0, ddof=0, keepdims=True, out=pool['1x2'])
+    OpArgMngr.add_workload("var", pool['2x2'], axis=0, ddof=1, keepdims=True, out=pool['1x2'])
+    OpArgMngr.add_workload("average", pool['2x2'], weights=pool['2'], axis=1, returned=True)
+    OpArgMngr.add_workload("histogram", pool['2x2'], bins=10, range=(0.0, 10.0))
     OpArgMngr.add_workload("add", pool['2x2'], pool['2x2'])
     OpArgMngr.add_workload("linalg.eig", pool['3x3'])
     OpArgMngr.add_workload("linalg.eigh", pool['3x3'])

diff --git a/include/mxnet/runtime/ffi_helper.h b/include/mxnet/runtime/ffi_helper.h
@@ -99,6 +99,24 @@ class Integer: public ObjectRef {
   MXNET_DEFINE_OBJECT_REF_METHODS(Integer, ObjectRef, IntegerObj)
 };
 
+class FloatObj: public Object {
+ public:
+  double value;
+  static constexpr const uint32_t _type_index = TypeIndex::kFloat;
+  static constexpr const char* _type_key = "MXNet.Float";
+  MXNET_DECLARE_FINAL_OBJECT_INFO(FloatObj, Object)
+};
+
+class Float: public ObjectRef {
+ public:
+  explicit Float(double value,
+                 ObjectPtr<FloatObj>&& data = make_object<FloatObj>()) {
+    data->value = value;
+    data_ = std::move(data);
+  }
+  MXNET_DEFINE_OBJECT_REF_METHODS(Float, ObjectRef, FloatObj)
+};
+
 //  Helper functions for fast FFI implementations
 /*!
  * \brief A builder class that helps to incrementally build ADT.

diff --git a/include/mxnet/runtime/object.h b/include/mxnet/runtime/object.h
@@ -58,6 +58,7 @@ enum TypeIndex  {
   kEllipsis = 5,
   kSlice = 6,
   kInteger = 7,
+  kFloat = 8,
   kStaticIndexEnd,
   /*! \brief Type index is allocated during runtime. */
   kDynamic = kStaticIndexEnd

diff --git a/python/mxnet/_ffi/_cython/convert.pxi b/python/mxnet/_ffi/_cython/convert.pxi
@@ -43,6 +43,10 @@ cdef extern from "mxnet/runtime/ffi_helper.h" namespace "mxnet::runtime":
         Integer()
         Integer(int64_t)
 
+    cdef cppclass Float(ObjectRef):
+        Float()
+        Float(double)
+
 
 cdef inline ADT convert_tuple(tuple src_tuple) except *:
     cdef uint32_t size = len(src_tuple)
@@ -71,5 +75,7 @@ cdef inline ObjectRef convert_object(object src_obj) except *:
         return convert_list(src_obj)
     elif isinstance(src_obj, Integral):
         return Integer(<int64_t>src_obj)
+    elif isinstance(src_obj, float):
+        return Float(<double>src_obj)
     else:
         raise TypeError("Don't know how to convert type %s" % type(src_obj))
diff --git a/python/mxnet/_ffi/node_generic.py b/python/mxnet/_ffi/node_generic.py
@@ -52,6 +52,8 @@ def convert_to_node(value):
     """
     if isinstance(value, Integral):
         return _api_internal._Integer(value)
+    elif isinstance(value, float):
+        return _api_internal._Float(value)
     elif isinstance(value, (list, tuple)):
         value = [convert_to_node(x) for x in value]
         return _api_internal._ADT(*value)

diff --git a/python/mxnet/_numpy_op_doc.py b/python/mxnet/_numpy_op_doc.py
@@ -231,98 +231,6 @@ def _np_dot(a, b, out=None):
     pass
 
 
-def _np_sum(a, axis=None, dtype=None, keepdims=False, initial=None, out=None):
-    r"""
-    Sum of array elements over a given axis.
-
-    Parameters
-    ----------
-    a : ndarray
-        Input data.
-    axis : None or int, optional
-        Axis or axes along which a sum is performed.  The default,
-        axis=None, will sum all of the elements of the input array.  If
-        axis is negative it counts from the last to the first axis.
-    dtype : dtype, optional
-        The type of the returned array and of the accumulator in which the
-        elements are summed. The default type is float32.
-    keepdims : bool, optional
-        If this is set to True, the axes which are reduced are left
-        in the result as dimensions with size one. With this option,
-        the result will broadcast correctly against the input array.
-
-        If the default value is passed, then `keepdims` will not be
-        passed through to the `sum` method of sub-classes of
-        `ndarray`, however any non-default value will be.  If the
-        sub-classes `sum` method does not implement `keepdims` any
-        exceptions will be raised.
-    initial: Currently only supports None as input, optional
-        Starting value for the sum.
-        Currently not implemented. Please use ``None`` as input or skip this argument.
-    out : ndarray or None, optional
-        Alternative output array in which to place the result. It must have
-        the same shape and dtype as the expected output.
-
-    Returns
-    -------
-    sum_along_axis : ndarray
-        An ndarray with the same shape as `a`, with the specified
-        axis removed. If an output array is specified, a reference to
-        `out` is returned.
-
-    Notes
-    -----
-    - Input type does not support Python native iterables.
-    - "out" param: cannot perform auto type change. out ndarray's dtype must be the same as the expected output.
-    - "initial" param is not supported yet. Please use None as input.
-    - Arithmetic is modular when using integer types, and no error is raised on overflow.
-    - The sum of an empty array is the neutral element 0:
-
-    >>> a = np.empty(1)
-    >>> np.sum(a)
-    array(0.)
-
-    This function differs from the original `numpy.sum
-    <https://docs.scipy.org/doc/numpy/reference/generated/numpy.sum.html>`_ in
-    the following aspects:
-
-    - Input type does not support Python native iterables(list, tuple, ...).
-    - "out" param: cannot perform auto type cast. out ndarray's dtype must be the same as the expected output.
-    - "initial" param is not supported yet. Please use ``None`` as input or skip it.
-
-    Examples
-    --------
-    >>> a = np.array([0.5, 1.5])
-    >>> np.sum(a)
-    array(2.)
-    >>> a = np.array([0.5, 0.7, 0.2, 1.5])
-    >>> np.sum(a, dtype=np.int32)
-    array(2, dtype=int32)
-    >>> a = np.array([[0, 1], [0, 5]])
-    >>> np.sum(a)
-    array(6.)
-    >>> np.sum(a, axis=0)
-    array([0., 6.])
-    >>> np.sum(a, axis=1)
-    array([1., 5.])
-
-    With output ndarray:
-
-    >>> a = np.array([[0, 1], [0, 5]])
-    >>> b = np.ones((2,), dtype=np.float32)
-    >>> np.sum(a, axis = 0, out=b)
-    array([0., 6.])
-    >>> b
-    array([0., 6.])
-
-    If the accumulator is too small, overflow occurs:
-
-    >>> np.ones(128, dtype=np.int8).sum(dtype=np.int8)
-    array(-128, dtype=int8)
-    """
-    pass
-
-
 def _np_copy(a, out=None):
     """
     Return an array copy of the given object.

diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py
@@ -48,7 +48,7 @@
            'true_divide', 'nonzero', 'quantile', 'percentile', 'shares_memory', 'may_share_memory', 'interp',
            'diff', 'ediff1d', 'resize', 'polyval', 'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite',
            'atleast_1d', 'atleast_2d', 'atleast_3d',
-           'where', 'bincount', 'rollaxis', 'pad', 'cumsum', 'diag', 'diagonal']
+           'where', 'bincount', 'rollaxis', 'pad', 'cumsum', 'sum', 'diag', 'diagonal']
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -1739,13 +1739,13 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
     if isinstance(bins, numeric_types):
         if range is None:
             raise NotImplementedError("automatic range is not supported yet...")
-        return _npi.histogram(a, bin_cnt=bins, range=range)
+        return tuple(_api_internal.histogram(a, None, bins, range))
     if isinstance(bins, (list, tuple)):
         raise NotImplementedError("array_like bins is not supported yet...")
     if isinstance(bins, str):
         raise NotImplementedError("string bins is not supported yet...")
     if isinstance(bins, NDArray):
-        return _npi.histogram(a, bins=bins)
+        return tuple(_api_internal.histogram(a, bins, None, None))
     raise ValueError("np.histogram fails with", locals())
 
 
@@ -4883,10 +4883,7 @@ def average(a, axis=None, weights=None, returned=False, out=None):
     >>> np.average(data, axis=1, weights=weights)
     array([0.75, 2.75, 4.75])
     """
-    if weights is None:
-        return _npi.average(a, axis=axis, weights=None, returned=returned, weighted=False, out=out)
-    else:
-        return _npi.average(a, axis=axis, weights=weights, returned=returned, out=out)
+    return _api_internal.average(a, weights, axis, returned, weights is not None, out)
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -5011,7 +5008,7 @@ def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):  # pylint:
     >>> np.std(a, dtype=np.float64)
     array(0.45, dtype=float64)
     """
-    return _npi.std(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out)
+    return _api_internal.std(a, axis, dtype, ddof, keepdims, out)
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -5081,7 +5078,7 @@ def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):  # pylint:
     >>> ((1-0.55)**2 + (0.1-0.55)**2)/2
     0.2025
     """
-    return _npi.var(a, axis=axis, dtype=dtype, ddof=ddof, keepdims=keepdims, out=out)
+    return _api_internal.var(a, axis, dtype, ddof, keepdims, out)
 
 
 # pylint: disable=redefined-outer-name
@@ -6294,7 +6291,7 @@ def outer(a, b):
         [-2., -1.,  0.,  1.,  2.],
         [-2., -1.,  0.,  1.,  2.]])
     """
-    return tensordot(a.flatten(), b.flatten(), 0)
+    return tensordot(a.reshape_view((-1, )), b.reshape_view((-1, )), 0)
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -8464,3 +8461,100 @@ def diagonal(a, offset=0, axis1=0, axis2=1):
             [1, 7]])
     """
     return _api_internal.diagonal(a, offset, axis1, axis2)
+
+
+# pylint:disable=redefined-outer-name, too-many-arguments
+@set_module('mxnet.ndarray.numpy')
+def sum(a, axis=None, dtype=None, out=None, keepdims=None, initial=None, where=None):
+    r"""
+    Sum of array elements over a given axis.
+
+    Parameters
+    ----------
+    a : ndarray
+        Input data.
+    axis : None or int, optional
+        Axis or axes along which a sum is performed.  The default,
+        axis=None, will sum all of the elements of the input array.  If
+        axis is negative it counts from the last to the first axis.
+    dtype : dtype, optional
+        The type of the returned array and of the accumulator in which the
+        elements are summed. The default type is float32.
+    keepdims : bool, optional
+        If this is set to True, the axes which are reduced are left
+        in the result as dimensions with size one. With this option,
+        the result will broadcast correctly against the input array.
+
+        If the default value is passed, then `keepdims` will not be
+        passed through to the `sum` method of sub-classes of
+        `ndarray`, however any non-default value will be.  If the
+        sub-classes `sum` method does not implement `keepdims` any
+        exceptions will be raised.
+    initial: Currently only supports None as input, optional
+        Starting value for the sum.
+        Currently not implemented. Please use ``None`` as input or skip this argument.
+    out : ndarray or None, optional
+        Alternative output array in which to place the result. It must have
+        the same shape and dtype as the expected output.
+
+    Returns
+    -------
+    sum_along_axis : ndarray
+        An ndarray with the same shape as `a`, with the specified
+        axis removed. If an output array is specified, a reference to
+        `out` is returned.
+
+    Notes
+    -----
+    - Input type does not support Python native iterables.
+    - "out" param: cannot perform auto type change. out ndarray's dtype must be the same as the expected output.
+    - "initial" param is not supported yet. Please use None as input.
+    - Arithmetic is modular when using integer types, and no error is raised on overflow.
+    - The sum of an empty array is the neutral element 0:
+
+    >>> a = np.empty(1)
+    >>> np.sum(a)
+    array(0.)
+
+    This function differs from the original `numpy.sum
+    <https://docs.scipy.org/doc/numpy/reference/generated/numpy.sum.html>`_ in
+    the following aspects:
+
+    - Input type does not support Python native iterables(list, tuple, ...).
+    - "out" param: cannot perform auto type cast. out ndarray's dtype must be the same as the expected output.
+    - "initial" param is not supported yet. Please use ``None`` as input or skip it.
+
+    Examples
+    --------
+    >>> a = np.array([0.5, 1.5])
+    >>> np.sum(a)
+    array(2.)
+    >>> a = np.array([0.5, 0.7, 0.2, 1.5])
+    >>> np.sum(a, dtype=np.int32)
+    array(2, dtype=int32)
+    >>> a = np.array([[0, 1], [0, 5]])
+    >>> np.sum(a)
+    array(6.)
+    >>> np.sum(a, axis=0)
+    array([0., 6.])
+    >>> np.sum(a, axis=1)
+    array([1., 5.])
+
+    With output ndarray:
+
+    >>> a = np.array([[0, 1], [0, 5]])
+    >>> b = np.ones((2,), dtype=np.float32)
+    >>> np.sum(a, axis=0, out=b)
+    array([0., 6.])
+    >>> b
+    array([0., 6.])
+
+    If the accumulator is too small, overflow occurs:
+
+    >>> np.ones(128, dtype=np.int8).sum(dtype=np.int8)
+    array(-128, dtype=int8)
+    """
+    if where is not None and where is not True:
+        raise ValueError("only where=None or where=True cases are supported for now")
+    return _api_internal.sum(a, axis, dtype, keepdims, initial, out)
+# pylint:enable=redefined-outer-name, too-many-arguments