pydata · shoyer · Jan 6, 2019 · Dec 15, 2018 · Dec 16, 2018 · Dec 16, 2018
diff --git a/doc/api.rst b/doc/api.rst
@@ -147,6 +147,7 @@ Computation
    Dataset.groupby
    Dataset.groupby_bins
    Dataset.rolling
+   Dataset.coarsen
    Dataset.resample
    Dataset.diff
    Dataset.quantile
@@ -312,6 +313,7 @@ Computation
    DataArray.groupby
    DataArray.groupby_bins
    DataArray.rolling
+   DataArray.coarsen
    DataArray.dt
    DataArray.resample
    DataArray.get_axis_num

diff --git a/doc/computation.rst b/doc/computation.rst
@@ -199,6 +199,45 @@ You can also use ``construct`` to compute a weighted rolling sum:
   To avoid this, use ``skipna=False`` as the above example.
 
 
+Coarsen large arrays
+====================
+
+``DataArray`` and ``Dataset`` objects include a
+:py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen`
+methods. This supports the block aggregation along multiple dimensions,
+
+.. ipython:: python
+
+  x = np.linspace(0, 10, 300)
+  t = pd.date_range('15/12/1999', periods=364)
+  da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]),
+                    dims=['time', 'x'], coords={'time': t, 'x': x})
+  da
+
+In order to take a block mean for every 7 days along ``time`` dimension and
+every 2 points along ``x`` dimension,
+
+.. ipython:: python
+
+  da.coarsen(time=7, x=2).mean()
+
+:py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data
+length is not a multiple of the corresponding window size.
+You can choose ``boundary='trim'`` or ``boundary='pad'`` options for trimming
+the excess entries or padding ``nan`` to insufficient entries,
+
+.. ipython:: python
+
+  da.coarsen(time=30, x=2, boundary='trim').mean()
+
+If you want to apply a specific function to coordinate, you can pass the
+function of function name to ``coordinate_func`` option,
+
+.. ipython:: python
+
+  da.coarsen(time=7, x=2, coordinate_func={'time': 'min'}).mean()
+
+
 Computation using Coordinates
 =============================
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -50,6 +50,10 @@ Enhancements
 - :py:class:`CFTimeIndex` uses slicing for string indexing when possible (like
   :py:class:`pandas.DatetimeIndex`), which avoids unnecessary copies.
   By `Stephan Hoyer <https://github.com/shoyer>`_
+- :py:meth:`~xarray.DataArray.coarsen` and
+  :py:meth:`~xarray.Dataset.coarsen` are newly added.
+  (:issue:`2525`)
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 - Enable passing ``rasterio.io.DatasetReader`` or ``rasterio.vrt.WarpedVRT`` to
   ``open_rasterio`` instead of file path string. Allows for in-memory
   reprojection, see  (:issue:`2588`).

diff --git a/xarray/core/common.py b/xarray/core/common.py
@@ -590,6 +590,65 @@ def rolling(self, dim=None, min_periods=None, center=False, **dim_kwargs):
         return self._rolling_cls(self, dim, min_periods=min_periods,
                                  center=center)
 
+    def coarsen(self, dim=None, boundary='exact', side='left',
+                coord_func='mean', **dim_kwargs):
+        """
+        Coarsen object.
+
+        Parameters
+        ----------
+        dim: dict, optional
+            Mapping from the dimension name to the window size.
+            dim : str
+                Name of the dimension to create the rolling iterator
+                along (e.g., `time`).
+            window : int
+                Size of the moving window.
+        boundary : 'exact' | 'trim' | 'pad'
+            If 'exact', a ValueError will be raised if dimension size is not a
+            multiple of the window size. If 'trim', the excess entries are
+            dropped. If 'pad', NA will be padded.
+        side : 'left' or 'right' or mapping from dimension to 'left' or 'right'
+        coord_func: function (name) that is applied to the coordintes,
+            or a mapping from coordinate name to function (name).
+
+        Returns
+        -------
+        Coarsen object (core.rolling.DataArrayCoarsen for DataArray,
+        core.rolling.DatasetCoarsen for Dataset.)
+
+        Examples
+        --------
+        Coarsen the long time series by averaging over every four days.
+
+        >>> da = xr.DataArray(np.linspace(0, 364, num=364),
+        ...                   dims='time',
+        ...                   coords={'time': pd.date_range(
+        ...                       '15/12/1999', periods=364)})
+        >>> da
+        >>> <xarray.DataArray (time: 364)>
+        >>> array([  0.      ,   1.002755,   2.00551 , ..., 362.997245,
+                   364.      ])
+        >>> Coordinates:
+        >>> * time     (time) datetime64[ns] 1999-12-15 ... 2000-12-12
+        >>>
+        >>> da.coarsen(time=4).mean()
+        >>> <xarray.DataArray (time: 91)>
+        >>> array([  1.504132,   5.515152,   9.526171,  13.53719 ,  ...,
+        >>>        362.495868])
+        >>> Coordinates:
+        >>> * time     (time) datetime64[ns] 1999-12-16T12:00:00 ...
+
+        See Also
+        --------
+        core.rolling.DataArrayCoarsen
+        core.rolling.DatasetCoarsen
+        """
+        dim = either_dict_or_kwargs(dim, dim_kwargs, 'coarsen')
+        return self._coarsen_cls(
+            self, dim, boundary=boundary, side=side,
+            coord_func=coord_func)
+
     def resample(self, indexer=None, skipna=None, closed=None, label=None,
                  base=0, keep_attrs=None, loffset=None, **indexer_kwargs):
         """Returns a Resample object for performing resampling operations.

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -159,6 +159,7 @@ class DataArray(AbstractArray, DataWithCoords):
     """
     _groupby_cls = groupby.DataArrayGroupBy
     _rolling_cls = rolling.DataArrayRolling
+    _coarsen_cls = rolling.DataArrayCoarsen
     _resample_cls = resample.DataArrayResample
 
     dt = property(DatetimeAccessor)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -316,6 +316,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords,
     """
     _groupby_cls = groupby.DatasetGroupBy
     _rolling_cls = rolling.DatasetRolling
+    _coarsen_cls = rolling.DatasetCoarsen
     _resample_cls = resample.DatasetResample
 
     def __init__(self, data_vars=None, coords=None, attrs=None,

diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
@@ -13,7 +13,7 @@
 import numpy as np
 import pandas as pd
 
-from . import dask_array_ops, dtypes, npcompat, nputils
+from . import dask_array_ops, dtypes, npcompat, nputils, utils
 from .nputils import nanfirst, nanlast
 from .pycompat import dask_array_type
 
@@ -261,8 +261,6 @@ def f(values, axis=None, skipna=None, **kwargs):
 sum = _create_nan_agg_method('sum')
 sum.numeric_only = True
 sum.available_min_count = True
-mean = _create_nan_agg_method('mean')
-mean.numeric_only = True
 std = _create_nan_agg_method('std')
 std.numeric_only = True
 var = _create_nan_agg_method('var')
@@ -278,6 +276,25 @@ def f(values, axis=None, skipna=None, **kwargs):
 cumsum_1d.numeric_only = True
 
 
+_mean = _create_nan_agg_method('mean')
+
+
+def mean(array, axis=None, skipna=None, **kwargs):
+    """ inhouse mean that can handle datatime dtype """
+    array = asarray(array)
+    if array.dtype.kind == 'M':
+        offset = min(array)
+        # infer the compatible timedelta dtype
+        dtype = (np.empty((1,), dtype=array.dtype) - offset).dtype
+        return _mean(utils.datetime_to_numeric(array, offset), axis=axis,
+                     skipna=skipna, **kwargs).astype(dtype) + offset
+    else:
+        return _mean(array, axis=axis, skipna=skipna, **kwargs)
+
+
+mean.numeric_only = True
+
+
 def _nd_cum_func(cum_func, array, axis, **kwargs):
     array = asarray(array)
     if axis is None:

diff --git a/xarray/core/missing.py b/xarray/core/missing.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pandas as pd
 
-from . import rolling
+from . import utils
 from .common import _contains_datetime_like_objects
 from .computation import apply_ufunc
 from .duck_array_ops import dask_array_type
@@ -370,7 +370,7 @@ def _get_valid_fill_mask(arr, dim, limit):
     None'''
     kw = {dim: limit + 1}
     # we explicitly use construct method to avoid copy.
-    new_dim = rolling._get_new_dimname(arr.dims, '_window')
+    new_dim = utils.get_temp_dimname(arr.dims, '_window')
     return (arr.isnull().rolling(min_periods=1, **kw)
             .construct(new_dim, fill_value=False)
             .sum(new_dim, skipna=False)) <= limit

diff --git a/xarray/core/ops.py b/xarray/core/ops.py
@@ -122,6 +122,20 @@
     New {da_or_ds} object with `{name}` applied along its rolling dimnension.
 """
 
+_COARSEN_REDUCE_DOCSTRING_TEMPLATE = """\
+Coarsen this object by applying `{name}` along its dimensions.
+
+Parameters
+----------
+**kwargs : dict
+    Additional keyword arguments passed on to `{name}`.
+
+Returns
+-------
+reduced : DataArray or Dataset
+    New object with `{name}` applied along its coasen dimnensions.
+"""
+
 
 def fillna(data, other, join="left", dataset_join="left"):
     """Fill missing values in this object with data from the other object.
@@ -378,3 +392,15 @@ def inject_datasetrolling_methods(cls):
     func.__doc__ = _ROLLING_REDUCE_DOCSTRING_TEMPLATE.format(
         name=func.__name__, da_or_ds='Dataset')
     setattr(cls, 'count', func)
+
+
+def inject_coarsen_methods(cls):
+    # standard numpy reduce methods
+    methods = [(name, getattr(duck_array_ops, name))
+               for name in NAN_REDUCE_METHODS]
+    for name, f in methods:
+        func = cls._reduce_method(f)
+        func.__name__ = name
+        func.__doc__ = _COARSEN_REDUCE_DOCSTRING_TEMPLATE.format(
+            name=func.__name__)
+        setattr(cls, name, func)
diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py
@@ -39,7 +39,6 @@
 
 
 import numpy as np
-import pandas as pd
 
 
 # for pandas 0.19
Original file line number	Diff line number	Diff line change
Expand Up		@@ -39,7 +39,6 @@


		import numpy as np
		import pandas as pd


		# for pandas 0.19
Expand Down