-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use getitem_with_mask in reindex_variables #1847
Changes from 7 commits
ee6a61f
555e0d6
c8dd89f
177c60c
4f876ff
d289d46
d293326
e20c685
5b9741f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,7 +63,7 @@ | |
"netcdf4": [""], | ||
"scipy": [""], | ||
"bottleneck": ["", null], | ||
"dask": ["", null], | ||
"dask": [""], | ||
}, | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import numpy as np | ||
import xarray as xr | ||
|
||
from . import requires_dask | ||
|
||
|
||
class Reindex(object): | ||
def setup(self): | ||
data = np.random.RandomState(0).randn(1000, 100, 100) | ||
self.ds = xr.Dataset({'temperature': (('time', 'x', 'y'), data)}, | ||
coords={'time': np.arange(1000), | ||
'x': np.arange(100), | ||
'y': np.arange(100)}) | ||
|
||
def time_1d_coarse(self): | ||
self.ds.reindex(time=np.arange(0, 1000, 5)).load() | ||
|
||
def time_1d_fine_all_found(self): | ||
self.ds.reindex(time=np.arange(0, 1000, 0.5), method='nearest').load() | ||
|
||
def time_1d_fine_some_missing(self): | ||
self.ds.reindex(time=np.arange(0, 1000, 0.5), method='nearest', | ||
tolerance=0.1).load() | ||
|
||
def time_2d_coarse(self): | ||
self.ds.reindex(x=np.arange(0, 100, 2), y=np.arange(0, 100, 2)).load() | ||
|
||
def time_2d_fine_all_found(self): | ||
self.ds.reindex(x=np.arange(0, 100, 0.5), y=np.arange(0, 100, 0.5), | ||
method='nearest').load() | ||
|
||
def time_2d_fine_some_missing(self): | ||
self.ds.reindex(x=np.arange(0, 100, 0.5), y=np.arange(0, 100, 0.5), | ||
method='nearest', tolerance=0.1).load() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we have unit test coverage for this test w/ dask? I see it was failing in the ASV benchmark before the changes in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll double check but I think the test failure was just ASV timing out. When I run a similar test case before (see my comment on the other PR) it was extremely slow. |
||
|
||
|
||
class ReindexDask(Reindex): | ||
def setup(self): | ||
requires_dask() | ||
super(ReindexDask, self).setup() | ||
self.ds = self.ds.chunk({'time': 100}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,3 +60,26 @@ def is_datetime_like(dtype): | |
""" | ||
return (np.issubdtype(dtype, np.datetime64) or | ||
np.issubdtype(dtype, np.timedelta64)) | ||
|
||
|
||
def result_type(*arrays_and_dtypes): | ||
"""Like np.result_type, but number + string -> object (not string). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should also take care of bool-float pair In [1]: import numpy as np
...: import xarray as xr
...:
...: x = xr.DataArray([True, False])
...: x.where(x)
...:
Out[1]:
<xarray.DataArray (dim_0: 2)>
array([ 1., nan])
Dimensions without coordinates: dim_0 |
||
|
||
Parameters | ||
---------- | ||
*arrays_and_dtypes : list of arrays and dtypes | ||
The dtype is extracted from both numpy and dask arrays. | ||
|
||
Returns | ||
------- | ||
numpy.dtype for the result. | ||
""" | ||
types = [np.result_type(t).type for t in arrays_and_dtypes] | ||
|
||
# For reference, see the NumPy type hierarchy: | ||
# https://docs.scipy.org/doc/numpy-1.13.0/reference/arrays.scalars.html | ||
if (any(issubclass(t, (np.number, np.bool_)) for t in types) and | ||
any(issubclass(t, np.character) for t in types)): | ||
return np.dtype(object) | ||
else: | ||
return np.result_type(*arrays_and_dtypes) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from __future__ import absolute_import | ||
from __future__ import division | ||
from __future__ import print_function | ||
|
||
import numpy as np | ||
import pytest | ||
|
||
from xarray.core import dtypes | ||
|
||
|
||
@pytest.mark.parametrize("args, expected", [ | ||
([np.bool], np.bool), | ||
([np.bool, np.string_], np.object), | ||
([np.float32, np.float64], np.float64), | ||
([np.float32, np.string_], np.object), | ||
([np.unicode_, np.int64], np.object), | ||
([np.unicode_, np.unicode_], np.unicode_), | ||
]) | ||
def test_result_type(args, expected): | ||
actual = dtypes.result_type(*args) | ||
assert actual == expected | ||
|
||
|
||
def test_result_type_scalar(): | ||
actual = dtypes.result_type(np.arange(3, dtype=np.float32), np.nan) | ||
assert actual == np.float32 | ||
|
||
|
||
def test_result_type_dask_array(): | ||
# verify it works without evaluating dask arrays | ||
da = pytest.importorskip('dask.array') | ||
dask = pytest.importorskip('dask') | ||
|
||
def error(): | ||
raise RuntimeError | ||
|
||
array = da.from_delayed(dask.delayed(error)(), (), np.float64) | ||
with pytest.raises(RuntimeError): | ||
array.compute() | ||
|
||
actual = dtypes.result_type(array) | ||
assert actual == np.float64 | ||
|
||
# note that this differs from the behavior for scalar numpy arrays, which | ||
# would get promoted to float32 | ||
actual = dtypes.result_type(array, np.array([0.5, 1.0], dtype=np.float32)) | ||
assert actual == np.float64 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we need to run benchmarks separately with/without dask. This just makes the whole suite take twice as long as necessary. We already need to write separate benchmark cases, given that the syntax for using dask is different.