Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add coarsen.construct #5476

Merged
merged 16 commits into from
Jun 24, 2021
Merged
2 changes: 2 additions & 0 deletions doc/api-hidden.rst
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@

core.rolling.DatasetCoarsen.all
core.rolling.DatasetCoarsen.any
core.rolling.DatasetCoarsen.construct
core.rolling.DatasetCoarsen.count
core.rolling.DatasetCoarsen.max
core.rolling.DatasetCoarsen.mean
Expand Down Expand Up @@ -185,6 +186,7 @@

core.rolling.DataArrayCoarsen.all
core.rolling.DataArrayCoarsen.any
core.rolling.DataArrayCoarsen.construct
core.rolling.DataArrayCoarsen.count
core.rolling.DataArrayCoarsen.max
core.rolling.DataArrayCoarsen.mean
Expand Down
2 changes: 1 addition & 1 deletion doc/howdoi.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ How do I ...
* - change the order of dimensions
- :py:meth:`DataArray.transpose`, :py:meth:`Dataset.transpose`
* - reshape dimensions
- :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack`
- :py:meth:`DataArray.stack`, :py:meth:`Dataset.stack`, :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct`
* - remove a variable from my object
- :py:meth:`Dataset.drop_vars`, :py:meth:`DataArray.drop_vars`
* - remove dimensions of length 1 or 0
Expand Down
6 changes: 4 additions & 2 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ v0.18.3 (unreleased)
New Features
~~~~~~~~~~~~

- Added :py:meth:`Dataset.coarsen.construct`, :py:meth:`DataArray.coarsen.construct` (:issue:`5454`, :pull:`5475`).
By `Deepak Cherian <https://github.com/dcherian>`_.
- Xarray now uses consolidated metadata by default when writing and reading Zarr
stores (:issue:`5251`).
By `Stephan Hoyer <https://github.com/shoyer>`_.
Expand Down Expand Up @@ -724,7 +726,7 @@ Documentation
By `Pieter Gijsbers <https://github.com/pgijsbers>`_.
- Fix grammar and typos in the :doc:`contributing` guide (:pull:`4545`).
By `Sahid Velji <https://github.com/sahidvelji>`_.
- Fix grammar and typos in the :doc:`io` guide (:pull:`4553`).
- Fix grammar and typos in the :doc:`user-guide/io` guide (:pull:`4553`).
By `Sahid Velji <https://github.com/sahidvelji>`_.
- Update link to NumPy docstring standard in the :doc:`contributing` guide (:pull:`4558`).
By `Sahid Velji <https://github.com/sahidvelji>`_.
Expand Down Expand Up @@ -3033,7 +3035,7 @@ Documentation
- Added apply_ufunc example to :ref:`/examples/weather-data.ipynb#Toy-weather-data` (:issue:`1844`).
By `Liam Brannigan <https://github.com/braaannigan>`_.
- New entry `Why don’t aggregations return Python scalars?` in the
:doc:`faq` (:issue:`1726`).
:doc:`getting-started-guide/faq` (:issue:`1726`).
By `0x0L <https://github.com/0x0L>`_.

Enhancements
Expand Down
105 changes: 105 additions & 0 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import functools
import itertools
import warnings
from typing import Any, Callable, Dict

Expand All @@ -8,6 +9,7 @@
from .arithmetic import CoarsenArithmetic
from .options import _get_keep_attrs
from .pycompat import is_duck_dask_array
from .utils import either_dict_or_kwargs

try:
import bottleneck
Expand Down Expand Up @@ -845,6 +847,109 @@ def __repr__(self):
klass=self.__class__.__name__, attrs=",".join(attrs)
)

def construct(
self,
window_dim=None,
keep_attrs=None,
**window_dim_kwargs,
):
"""
Convert this Coarsen object to a DataArray or Dataset,
where the coarsening dimension is split or reshaped to two
new dimensions.

Parameters
----------
window_dim: mapping
A mapping from existing dimension name to new dimension names.
The size of the second dimension will be the length of the
coarsening window.
keep_attrs: bool, optional
Preserve attributes if True
**window_dim_kwargs : {dim: new_name, ...}
The keyword arguments form of ``window_dim``.

Returns
-------
Dataset or DataArray with reshaped dimensions

Examples
--------
>>> da = xr.DataArray(np.arange(24), dims="time")
>>> da.coarsen(time=12).construct(time=("year", "month"))
<xarray.DataArray (year: 2, month: 12)>
array([[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]])
Dimensions without coordinates: year, month

See Also
--------
DataArrayRolling.construct
DatasetRolling.construct
"""

from .dataarray import DataArray
from .dataset import Dataset

window_dim = either_dict_or_kwargs(
window_dim, window_dim_kwargs, "Coarsen.construct"
)
if not window_dim:
raise ValueError(
"Either window_dim or window_dim_kwargs need to be specified."
)

bad_new_dims = tuple(
win
for win, dims in window_dim.items()
if len(dims) != 2 or isinstance(dims, str)
)
if bad_new_dims:
raise ValueError(
f"Please provide exactly two dimension names for the following coarsening dimensions: {bad_new_dims}"
)

if keep_attrs is None:
keep_attrs = _get_keep_attrs(default=True)

missing_dims = set(window_dim) - set(self.windows)
if missing_dims:
raise ValueError(
f"'window_dim' must contain entries for all dimensions to coarsen. Missing {missing_dims}"
)
extra_windows = set(self.windows) - set(window_dim)
if extra_windows:
raise ValueError(
f"'window_dim' includes dimensions that will not be coarsened: {extra_windows}"
)

reshaped = Dataset()
if isinstance(self.obj, DataArray):
obj = self.obj._to_temp_dataset()
else:
obj = self.obj

reshaped.attrs = obj.attrs if keep_attrs else {}

for key, var in obj.variables.items():
reshaped_dims = tuple(
itertools.chain(*[window_dim.get(dim, [dim]) for dim in list(var.dims)])
)
if reshaped_dims != var.dims:
windows = {w: self.windows[w] for w in window_dim if w in var.dims}
reshaped_var, _ = var.coarsen_reshape(windows, self.boundary, self.side)
attrs = var.attrs if keep_attrs else {}
reshaped[key] = (reshaped_dims, reshaped_var, attrs)
else:
reshaped[key] = var

should_be_coords = set(window_dim) & set(self.obj.coords)
result = reshaped.set_coords(should_be_coords)
if isinstance(self.obj, DataArray):
return self.obj._from_temp_dataset(result)
else:
return result


class DataArrayCoarsen(Coarsen):
__slots__ = ()
Expand Down
12 changes: 7 additions & 5 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2158,7 +2158,7 @@ def coarsen(
if not windows:
return self._replace(attrs=_attrs)

reshaped, axes = self._coarsen_reshape(windows, boundary, side)
reshaped, axes = self.coarsen_reshape(windows, boundary, side)
if isinstance(func, str):
name = func
func = getattr(duck_array_ops, name, None)
Expand All @@ -2167,7 +2167,7 @@ def coarsen(

return self._replace(data=func(reshaped, axis=axes, **kwargs), attrs=_attrs)

def _coarsen_reshape(self, windows, boundary, side):
def coarsen_reshape(self, windows, boundary, side):
"""
Construct a reshaped-array for coarsen
"""
Expand All @@ -2183,7 +2183,9 @@ def _coarsen_reshape(self, windows, boundary, side):

for d, window in windows.items():
if window <= 0:
raise ValueError(f"window must be > 0. Given {window}")
raise ValueError(
f"window must be > 0. Given {window} for dimension {d}"
)

variable = self
for d, window in windows.items():
Expand All @@ -2193,8 +2195,8 @@ def _coarsen_reshape(self, windows, boundary, side):
if boundary[d] == "exact":
if n * window != size:
raise ValueError(
"Could not coarsen a dimension of size {} with "
"window {}".format(size, window)
f"Could not coarsen a dimension of size {size} with "
f"window {window} and boundary='exact'. Try a different 'boundary' option."
)
elif boundary[d] == "trim":
if side[d] == "left":
Expand Down
79 changes: 78 additions & 1 deletion xarray/tests/test_coarsen.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@
import xarray as xr
from xarray import DataArray, Dataset, set_options

from . import assert_allclose, assert_equal, has_dask, requires_cftime
from . import (
assert_allclose,
assert_equal,
assert_identical,
has_dask,
raise_if_dask_computes,
requires_cftime,
)
from .test_dataarray import da
from .test_dataset import ds

Expand Down Expand Up @@ -299,3 +306,73 @@ def test_coarsen_da_reduce(da, window, name):
actual = coarsen_obj.reduce(getattr(np, f"nan{name}"))
expected = getattr(coarsen_obj, name)()
assert_allclose(actual, expected)


@pytest.mark.parametrize("dask", [True, False])
def test_coarsen_construct(dask):

ds = Dataset(
{
"vart": ("time", np.arange(48), {"a": "b"}),
"varx": ("x", np.arange(10), {"a": "b"}),
"vartx": (("x", "time"), np.arange(480).reshape(10, 48), {"a": "b"}),
"vary": ("y", np.arange(12)),
},
coords={"time": np.arange(48), "y": np.arange(12)},
attrs={"foo": "bar"},
)

if dask and has_dask:
ds = ds.chunk({"x": 4, "time": 10})

expected = xr.Dataset(attrs={"foo": "bar"})
expected["vart"] = (("year", "month"), ds.vart.data.reshape((-1, 12)), {"a": "b"})
expected["varx"] = (("x", "x_reshaped"), ds.varx.data.reshape((-1, 5)), {"a": "b"})
expected["vartx"] = (
("x", "x_reshaped", "year", "month"),
ds.vartx.data.reshape(2, 5, 4, 12),
{"a": "b"},
)
expected["vary"] = ds.vary
expected.coords["time"] = (("year", "month"), ds.time.data.reshape((-1, 12)))

with raise_if_dask_computes():
actual = ds.coarsen(time=12, x=5).construct(
{"time": ("year", "month"), "x": ("x", "x_reshaped")}
)
assert_identical(actual, expected)

with raise_if_dask_computes():
actual = ds.coarsen(time=12, x=5).construct(
time=("year", "month"), x=("x", "x_reshaped")
)
assert_identical(actual, expected)

with raise_if_dask_computes():
actual = ds.coarsen(time=12, x=5).construct(
{"time": ("year", "month"), "x": ("x", "x_reshaped")}, keep_attrs=False
)
for var in actual:
assert actual[var].attrs == {}
assert actual.attrs == {}

with raise_if_dask_computes():
actual = ds.vartx.coarsen(time=12, x=5).construct(
{"time": ("year", "month"), "x": ("x", "x_reshaped")}
)
assert_identical(actual, expected["vartx"])

with pytest.raises(ValueError):
ds.coarsen(time=12).construct(foo="bar")

with pytest.raises(ValueError):
ds.coarsen(time=12, x=2).construct(time=("year", "month"))

with pytest.raises(ValueError):
ds.coarsen(time=12).construct()

with pytest.raises(ValueError):
ds.coarsen(time=12).construct(time="bar")

with pytest.raises(ValueError):
ds.coarsen(time=12).construct(time=("bar",))