-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added Coarsen #2612
Added Coarsen #2612
Changes from 4 commits
3525b9c
5ff3102
6f3cf0c
f1f4804
ab5d2f6
9123fd4
c85d18a
0aa7a37
b656d62
2ffcb23
04773eb
b33020b
b13af18
24f3061
d806c96
96bf29b
b70996a
827794e
a354005
82c08af
d73d1d5
a92c431
0e53c7b
07b8060
aa41f39
4c347af
2a06b05
50fa6aa
1d04bdd
1523292
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,32 +5,14 @@ | |
|
||
import numpy as np | ||
|
||
from . import dtypes | ||
from . import dtypes, utils | ||
from .dask_array_ops import dask_rolling_wrapper | ||
from .ops import ( | ||
bn, has_bottleneck, inject_bottleneck_rolling_methods, | ||
inject_datasetrolling_methods) | ||
bn, has_bottleneck, inject_coarsen_methods, | ||
inject_bottleneck_rolling_methods, inject_datasetrolling_methods) | ||
from .pycompat import OrderedDict, dask_array_type, zip | ||
|
||
|
||
def _get_new_dimname(dims, new_dim): | ||
""" Get an new dimension name based on new_dim, that is not used in dims. | ||
If the same name exists, we add an underscore(s) in the head. | ||
|
||
Example1: | ||
dims: ['a', 'b', 'c'] | ||
new_dim: ['_rolling'] | ||
-> ['_rolling'] | ||
Example2: | ||
dims: ['a', 'b', 'c', '_rolling'] | ||
new_dim: ['_rolling'] | ||
-> ['__rolling'] | ||
""" | ||
while new_dim in dims: | ||
new_dim = '_' + new_dim | ||
return new_dim | ||
|
||
|
||
class Rolling(object): | ||
"""A object that implements the moving window pattern. | ||
|
||
|
@@ -231,7 +213,7 @@ def reduce(self, func, **kwargs): | |
reduced : DataArray | ||
Array with summarized data. | ||
""" | ||
rolling_dim = _get_new_dimname(self.obj.dims, '_rolling_dim') | ||
rolling_dim = utils.get_temp_dimname(self.obj.dims, '_rolling_dim') | ||
windows = self.construct(rolling_dim) | ||
result = windows.reduce(func, dim=rolling_dim, **kwargs) | ||
|
||
|
@@ -242,7 +224,7 @@ def reduce(self, func, **kwargs): | |
def _counts(self): | ||
""" Number of non-nan entries in each rolling window. """ | ||
|
||
rolling_dim = _get_new_dimname(self.obj.dims, '_rolling_dim') | ||
rolling_dim = utils.get_temp_dimname(self.obj.dims, '_rolling_dim') | ||
# We use False as the fill_value instead of np.nan, since boolean | ||
# array is faster to be reduced than object array. | ||
# The use of skipna==False is also faster since it does not need to | ||
|
@@ -454,5 +436,121 @@ def construct(self, window_dim, stride=1, fill_value=dtypes.NA): | |
**{self.dim: slice(None, None, stride)}) | ||
|
||
|
||
class Coarsen(object): | ||
"""A object that implements the coarsen. | ||
|
||
See Also | ||
-------- | ||
Dataset.coarsen | ||
DataArray.coarsen | ||
""" | ||
|
||
_attributes = ['windows', 'side', 'trim_excess'] | ||
|
||
def __init__(self, obj, windows, side, trim_excess, coordinate_func): | ||
""" | ||
Moving window object. | ||
|
||
Parameters | ||
---------- | ||
obj : Dataset or DataArray | ||
Object to window. | ||
windows : A mapping from a dimension name to window size | ||
dim : str | ||
Name of the dimension to create the rolling iterator | ||
along (e.g., `time`). | ||
window : int | ||
Size of the moving window. | ||
side : 'left' or 'right' or mapping from dimension to 'left' or 'right' | ||
coordinate_func: mapping from coordinate name to func. | ||
|
||
trim_excess : boolean, or dict of boolean default False | ||
Set the labels at the center of the window. | ||
|
||
Returns | ||
------- | ||
coarsen | ||
""" | ||
self.obj = obj | ||
self.windows = windows | ||
self.side = side | ||
self.trim_excess = trim_excess | ||
|
||
if coordinate_func is None: | ||
coordinate_func = {} | ||
for c in self.obj.coords: | ||
if c not in coordinate_func: | ||
coordinate_func[c] = np.mean | ||
self.coordinate_func = coordinate_func | ||
|
||
def __repr__(self): | ||
"""provide a nice str repr of our rolling object""" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rolling -> coarsen |
||
|
||
attrs = ["{k}->{v}".format(k=k, v=getattr(self, k)) | ||
for k in self._attributes | ||
if getattr(self, k, None) is not None] | ||
return "{klass} [{attrs}]".format(klass=self.__class__.__name__, | ||
attrs=','.join(attrs)) | ||
|
||
|
||
class DataArrayCoarsen(Coarsen): | ||
@classmethod | ||
def _reduce_method(cls, func): | ||
""" | ||
Return a wrapped function for injecting numpy and bottoleneck methods. | ||
see ops.inject_coarsen_methods | ||
""" | ||
def wrapped_func(self, **kwargs): | ||
from .dataarray import DataArray | ||
|
||
reduced = self.obj.variable.coarsen( | ||
self.windows, func, self.side, self.trim_excess) | ||
coords = {} | ||
for c, v in self.obj.coords.items(): | ||
if c == self.obj.name: | ||
coords[c] = reduced | ||
else: | ||
if any(d in self.windows for d in v.dims): | ||
coords[c] = v.variable.coarsen( | ||
self.windows, self.coordinate_func[c], self.side, | ||
self.trim_excess) | ||
else: | ||
coords[c] = v | ||
return DataArray(reduced, dims=self.obj.dims, coords=coords) | ||
|
||
return wrapped_func | ||
|
||
|
||
class DatasetCoarsen(Coarsen): | ||
@classmethod | ||
def _reduce_method(cls, func): | ||
""" | ||
Return a wrapped function for injecting numpy and bottoleneck methods. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. bottoleneck -> bottleneck |
||
see ops.inject_coarsen_methods | ||
""" | ||
def wrapped_func(self, **kwargs): | ||
from .dataset import Dataset | ||
|
||
reduced = OrderedDict() | ||
for key, da in self.obj.data_vars.items(): | ||
reduced[key] = da.variable.coarsen( | ||
self.windows, func, self.side, self.trim_excess) | ||
|
||
coords = {} | ||
for c, v in self.obj.coords.items(): | ||
if any(d in self.windows for d in v.dims): | ||
coords[c] = v.variable.coarsen( | ||
self.windows, self.coordinate_func[c], self.side, | ||
self.trim_excess) | ||
else: | ||
coords[c] = v.variable | ||
return Dataset(reduced, coords=coords) | ||
|
||
return wrapped_func | ||
|
||
|
||
|
||
inject_bottleneck_rolling_methods(DataArrayRolling) | ||
inject_datasetrolling_methods(DatasetRolling) | ||
inject_coarsen_methods(DataArrayCoarsen) | ||
inject_coarsen_methods(DatasetCoarsen) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1632,6 +1632,73 @@ def rolling_window(self, dim, window, window_dim, center=False, | |
array, axis=self.get_axis_num(dim), window=window, | ||
center=center, fill_value=fill_value)) | ||
|
||
def coarsen(self, windows, func, side='left', trim_excess=False): | ||
windows = {k: v for k, v in windows.items() if k in self.dims} | ||
new_dimensions = {k: utils.get_temp_dimname(self.dims, k) | ||
for k in windows} | ||
reshaped = self._coarsen_reshape(windows, side, trim_excess, | ||
new_dimensions) | ||
|
||
axis = tuple([reshaped.get_axis_num(d) for d | ||
in new_dimensions.values()]) | ||
return type(self)(self.dims, func(reshaped, axis=axis), self._attrs) | ||
|
||
def _coarsen_reshape(self, windows, side, trim_excess, coarsen_dimensions): | ||
""" | ||
Construct a reshaped-variable for corsen | ||
""" | ||
if not utils.is_dict_like(side): | ||
side = {d: side for d in windows.keys()} | ||
|
||
if not utils.is_dict_like(trim_excess): | ||
trim_excess = {d: trim_excess for d in windows.keys()} | ||
|
||
# remove unrelated dimensions | ||
side = {k: v for k, v in side.items() if k in self.dims} | ||
trim_excess = {k: v for k, v in trim_excess.items() if k in self.dims} | ||
|
||
if windows == {}: | ||
return type(self)(self.dims, self.data, self._attrs) | ||
|
||
for d, window in windows.items(): | ||
if window <= 0: | ||
raise ValueError('window must be > 0') | ||
|
||
variable = self | ||
for d, window in windows.items(): | ||
# trim or pad the object | ||
size = variable.shape[self._get_axis_num(d)] | ||
n = int(size / window) | ||
if trim_excess[d]: | ||
if side[d] == 'left': | ||
variable = variable.isel({d: slice(window * int(n))}) | ||
else: | ||
excess = size - window * n | ||
variable = variable.isel({d: slice(excess, None)}) | ||
else: # pad | ||
pad = window * (n + 1) - size | ||
if side[d] == 'left': | ||
pad_widths = {d: (0, pad)} | ||
else: | ||
pad_widths = {d: (pad, 0)} | ||
variable = variable.pad_with_fill_value(pad_widths) | ||
|
||
shape = [] | ||
axes = [] | ||
dims = [] | ||
for i, d in enumerate(variable.dims): | ||
if d in windows: | ||
size = variable.shape[i] | ||
shape.append(int(size / windows[d])) | ||
shape.append(windows[d]) | ||
dims.append(d) | ||
dims.append(coarsen_dimensions[d]) | ||
else: | ||
shape.append(variable.shape[i]) | ||
dims.append(d) | ||
|
||
return Variable(dims, variable.data.reshape(shape), self._attrs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it worth making an actual xarray.Variable object here rather than just returning |
||
|
||
@property | ||
def real(self): | ||
return type(self)(self.dims, self.data.real, self._attrs) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be nice to have an API that lets us express at least three options:
Maybe a string valued keyword argument would work better here, e.g.,
boundary='trim'
,boundary='pad'
andboundary='exact'
?I would also suggest putting this argument before
side
, sinceside
is only used for a particular (non-default) value of this argument.