From 1d00df3841f2a3a828d7a3ff14256dab79619557 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Mon, 13 Jul 2020 16:02:18 -0500 Subject: [PATCH 01/17] Change isinstance checks to duck Dask Array checks #4208 --- xarray/backends/common.py | 4 ++-- xarray/coding/strings.py | 6 +++--- xarray/coding/variables.py | 6 +++--- xarray/conventions.py | 6 +++--- xarray/convert.py | 4 ++-- xarray/core/accessor_dt.py | 8 ++++---- xarray/core/common.py | 6 +++--- xarray/core/computation.py | 10 +++++----- xarray/core/dask_array_compat.py | 7 ++++++- xarray/core/dataset.py | 8 ++++---- xarray/core/duck_array_ops.py | 26 +++++++++++++------------- xarray/core/formatting.py | 11 +++++------ xarray/core/indexing.py | 7 ++++--- xarray/core/missing.py | 5 +++-- xarray/core/nanops.py | 12 ++++++------ xarray/core/rolling.py | 10 ++++------ xarray/core/rolling_exp.py | 4 ++-- xarray/core/variable.py | 23 ++++++++++------------- 18 files changed, 82 insertions(+), 81 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 63c4c956f86..b5b615c988d 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -8,7 +8,7 @@ from ..conventions import cf_encoder from ..core import indexing -from ..core.pycompat import dask_array_type +from ..core.dask_array_compat import is_duck_dask_array from ..core.utils import FrozenDict, NdimSizeLenMixin # Create a logger object, but don't add any handlers. Leave that to user code. @@ -177,7 +177,7 @@ def __init__(self, lock=None): self.lock = lock def add(self, source, target, region=None): - if isinstance(source, dask_array_type): + if is_duck_dask_array(source): self.sources.append(source) self.targets.append(target) self.regions.append(region) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 35cc190ffe3..706536d4188 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -4,7 +4,7 @@ import numpy as np from ..core import indexing -from ..core.pycompat import dask_array_type +from ..core.dask_array_compat import is_duck_dask_array from ..core.variable import Variable from .variables import ( VariableCoder, @@ -130,7 +130,7 @@ def bytes_to_char(arr): if arr.dtype.kind != "S": raise ValueError("argument must have a fixed-width bytes dtype") - if isinstance(arr, dask_array_type): + if is_duck_dask_array(arr): import dask.array as da return da.map_blocks( @@ -167,7 +167,7 @@ def char_to_bytes(arr): # can't make an S0 dtype return np.zeros(arr.shape[:-1], dtype=np.string_) - if isinstance(arr, dask_array_type): + if is_duck_dask_array(arr): import dask.array as da if len(arr.chunks[-1]) > 1: diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 28ead397461..06f7be26a45 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -7,7 +7,7 @@ import pandas as pd from ..core import dtypes, duck_array_ops, indexing -from ..core.pycompat import dask_array_type +from ..core.dask_array_compat import is_duck_dask_array from ..core.variable import Variable @@ -56,7 +56,7 @@ class _ElementwiseFunctionArray(indexing.ExplicitlyIndexedNDArrayMixin): """ def __init__(self, array, func, dtype): - assert not isinstance(array, dask_array_type) + assert not is_duck_dask_array(array) self.array = indexing.as_indexable(array) self.func = func self._dtype = dtype @@ -93,7 +93,7 @@ def lazy_elemwise_func(array, func, dtype): ------- Either a dask.array.Array or _ElementwiseFunctionArray. """ - if isinstance(array, dask_array_type): + if is_duck_dask_array(array): return array.map_blocks(func, dtype=dtype) else: return _ElementwiseFunctionArray(array, func, dtype) diff --git a/xarray/conventions.py b/xarray/conventions.py index fc0572944f3..a62d7e6bc80 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -8,7 +8,7 @@ from .coding.variables import SerializationWarning, pop_to from .core import duck_array_ops, indexing from .core.common import contains_cftime_datetimes -from .core.pycompat import dask_array_type +from .core.dask_array_compat import is_duck_dask_array from .core.variable import IndexVariable, Variable, as_variable @@ -178,7 +178,7 @@ def ensure_dtype_not_object(var, name=None): if var.dtype.kind == "O": dims, data, attrs, encoding = _var_as_tuple(var) - if isinstance(data, dask_array_type): + if is_duck_dask_array(data): warnings.warn( "variable {} has data in the form of a dask array with " "dtype=object, which means it is being loaded into memory " @@ -351,7 +351,7 @@ def decode_cf_variable( del attributes["dtype"] data = BoolTypeArray(data) - if not isinstance(data, dask_array_type): + if not is_duck_dask_array(data): data = indexing.LazilyOuterIndexedArray(data) return Variable(dimensions, data, attributes, encoding=encoding) diff --git a/xarray/convert.py b/xarray/convert.py index 0c86b090f34..6b05ce14c37 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -254,7 +254,7 @@ def from_iris(cube): """ Convert a Iris cube into an DataArray """ import iris.exceptions - from xarray.core.pycompat import dask_array_type + from xarray.core.dask_array_compat import is_duck_dask_array name = _name(cube) if name == "unknown": @@ -290,7 +290,7 @@ def from_iris(cube): cube_data = cube.core_data() if hasattr(cube, "core_data") else cube.data # Deal with dask and numpy masked arrays - if isinstance(cube_data, dask_array_type): + if is_duck_dask_array(cube_data): from dask.array import ma as dask_ma filled_data = dask_ma.filled(cube_data, get_fill_value(cube.dtype)) diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 630aaee142f..802fd8dfa3b 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -6,7 +6,7 @@ is_np_datetime_like, is_np_timedelta_like, ) -from .pycompat import dask_array_type +from .dask_array_compat import is_duck_dask_array def _season_from_months(months): @@ -70,7 +70,7 @@ def _get_date_field(values, name, dtype): else: access_method = _access_through_cftimeindex - if isinstance(values, dask_array_type): + if is_duck_dask_array(values): from dask.array import map_blocks return map_blocks(access_method, values, name, dtype=dtype) @@ -114,7 +114,7 @@ def _round_field(values, name, freq): Array-like of datetime fields accessed for each element in values """ - if isinstance(values, dask_array_type): + if is_duck_dask_array(values): from dask.array import map_blocks dtype = np.datetime64 if is_np_datetime_like(values.dtype) else np.dtype("O") @@ -151,7 +151,7 @@ def _strftime(values, date_format): access_method = _strftime_through_series else: access_method = _strftime_through_cftimeindex - if isinstance(values, dask_array_type): + if is_duck_dask_array(values): from dask.array import map_blocks return map_blocks(access_method, values, date_format) diff --git a/xarray/core/common.py b/xarray/core/common.py index 67dc0fda461..0993e2311dd 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -23,7 +23,7 @@ from .arithmetic import SupportsArithmetic from .npcompat import DTypeLike from .options import OPTIONS, _get_keep_attrs -from .pycompat import dask_array_type +from .dask_array_compat import is_duck_dask_array from .rolling_exp import RollingExp from .utils import Frozen, either_dict_or_kwargs, is_scalar @@ -1425,7 +1425,7 @@ def _full_like_variable(other, fill_value, dtype: DTypeLike = None): """ from .variable import Variable - if isinstance(other.data, dask_array_type): + if is_duck_dask_array(other.data): import dask.array if dtype is None: @@ -1573,7 +1573,7 @@ def _contains_cftime_datetimes(array) -> bool: else: if array.dtype == np.dtype("O") and array.size > 0: sample = array.ravel()[0] - if isinstance(sample, dask_array_type): + if is_duck_dask_array(sample): sample = sample.compute() if isinstance(sample, np.ndarray): sample = sample.item() diff --git a/xarray/core/computation.py b/xarray/core/computation.py index d8a0c53e817..a7ae045cae5 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -27,7 +27,7 @@ from .alignment import align, deep_align from .merge import merge_coordinates_without_align from .options import OPTIONS -from .pycompat import dask_array_type +from .dask_array_compat import is_duck_dask_array from .utils import is_dict_like from .variable import Variable @@ -569,7 +569,7 @@ def apply_variable_ufunc( for arg, core_dims in zip(args, signature.input_core_dims) ] - if any(isinstance(array, dask_array_type) for array in input_data): + if any(is_duck_dask_array(array) for array in input_data): if dask == "forbidden": raise ValueError( "apply_ufunc encountered a dask array on an " @@ -698,7 +698,7 @@ def _apply_blockwise( ) for n, (data, core_dims) in enumerate(zip(args, signature.input_core_dims)): - if isinstance(data, dask_array_type): + if is_duck_dask_array(data): # core dimensions cannot span multiple chunks for axis, dim in enumerate(core_dims, start=-len(core_dims)): if len(data.chunks[axis]) != 1: @@ -735,7 +735,7 @@ def _apply_blockwise( def apply_array_ufunc(func, *args, dask="forbidden"): """Apply a ndarray level function over ndarray objects.""" - if any(isinstance(arg, dask_array_type) for arg in args): + if any(is_duck_dask_array(arg) for arg in args): if dask == "forbidden": raise ValueError( "apply_ufunc encountered a dask array on an " @@ -1574,7 +1574,7 @@ def _calc_idxminmax( indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna) # Handle dask arrays. - if isinstance(array.data, dask_array_type): + if is_duck_dask_array(array.data): import dask.array chunks = dict(zip(array.dims, array.chunks)) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 94c50d90e84..6645be26f2b 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -5,6 +5,7 @@ import numpy as np from .pycompat import dask_array_type +from .utils import is_array_like try: import dask.array as da @@ -39,7 +40,7 @@ def meta_from_array(x, ndim=None, dtype=None): """ # If using x._meta, x must be a Dask Array, some libraries (e.g. zarr) # implement a _meta attribute that are incompatible with Dask Array._meta - if hasattr(x, "_meta") and isinstance(x, dask_array_type): + if hasattr(x, "_meta") and is_duck_dask_array(x): x = x._meta if dtype is None and x is None: @@ -130,6 +131,10 @@ def _validate_pad_output_shape(input_shape, pad_width, output_shape): ) +def is_duck_dask_array(x): + return is_array_like(x) and isinstance(x, dask_array_type) + + def pad(array, pad_width, mode="constant", **kwargs): padded = da.pad(array, pad_width, mode=mode, **kwargs) # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5bfddaa710b..f09bcd984b4 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -80,7 +80,7 @@ ) from .missing import get_clean_interp_index from .options import OPTIONS, _get_keep_attrs -from .pycompat import dask_array_type +from .dask_array_compat import is_duck_dask_array from .utils import ( Default, Frozen, @@ -649,7 +649,7 @@ def load(self, **kwargs) -> "Dataset": lazy_data = { k: v._data for k, v in self.variables.items() - if isinstance(v._data, dask_array_type) + if is_duck_dask_array(v._data) } if lazy_data: import dask.array as da @@ -820,7 +820,7 @@ def _persist_inplace(self, **kwargs) -> "Dataset": lazy_data = { k: v._data for k, v in self.variables.items() - if isinstance(v._data, dask_array_type) + if is_duck_dask_array(v._data) } if lazy_data: import dask @@ -5963,7 +5963,7 @@ def polyfit( continue if skipna is None: - if isinstance(da.data, dask_array_type): + if is_duck_dask_array(da.data): skipna_da = True else: skipna_da = np.any(da.isnull()) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index df579d23544..c58c29db1f2 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -14,7 +14,7 @@ from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast -from .pycompat import dask_array_type +from .dask_array_compat import is_duck_dask_array try: import dask.array as dask_array @@ -46,7 +46,7 @@ def f(*args, **kwargs): dispatch_args = args[0] else: dispatch_args = args[array_args] - if any(isinstance(a, dask_array_type) for a in dispatch_args): + if any(is_duck_dask_array(a) for a in dispatch_args): try: wrapped = getattr(dask_module, name) except AttributeError as e: @@ -64,7 +64,7 @@ def f(*args, **kwargs): def fail_on_dask_array_input(values, msg=None, func_name=None): - if isinstance(values, dask_array_type): + if is_duck_dask_array(values): if msg is None: msg = "%r is not yet a valid method on dask arrays" if func_name is None: @@ -104,7 +104,7 @@ def isnull(data): return zeros_like(data, dtype=bool) else: # at this point, array should have dtype=object - if isinstance(data, (np.ndarray, dask_array_type)): + if is_duck_dask_array(data) or isinstance(data, np.ndarray): return pandas_isnull(data) else: # Not reachable yet, but intended for use with other duck array @@ -136,7 +136,7 @@ def notnull(data): def gradient(x, coord, axis, edge_order): - if isinstance(x, dask_array_type): + if is_duck_dask_array(x): return dask_array.gradient(x, coord, axis=axis, edge_order=edge_order) return np.gradient(x, coord, axis=axis, edge_order=edge_order) @@ -161,7 +161,7 @@ def trapz(y, x, axis): def asarray(data): return ( data - if (isinstance(data, dask_array_type) or hasattr(data, "__array_function__")) + if (is_duck_dask_array(data) or hasattr(data, "__array_function__")) else np.asarray(data) ) @@ -192,8 +192,8 @@ def lazy_array_equiv(arr1, arr2): return False if ( dask_array - and isinstance(arr1, dask_array_type) - and isinstance(arr2, dask_array_type) + and is_duck_dask_array(arr1) + and is_duck_dask_array(arr2) ): # GH3068 if arr1.name == arr2.name: @@ -216,7 +216,7 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8): dask_version is not None and LooseVersion(dask_version) >= "2.9.1" ) if not sufficient_dask_version and any( - isinstance(arr, dask_array_type) for arr in [arr1, arr2] + is_duck_dask_array(arr) for arr in [arr1, arr2] ): arr1 = np.array(arr1) arr2 = np.array(arr2) @@ -323,7 +323,7 @@ def f(values, axis=None, skipna=None, **kwargs): try: return func(values, axis=axis, **kwargs) except AttributeError: - if not isinstance(values, dask_array_type): + if not is_duck_dask_array(values): raise try: # dask/dask#3133 dask sometimes needs dtype argument # if func does not accept dtype, then raises TypeError @@ -539,7 +539,7 @@ def mean(array, axis=None, skipna=None, **kwargs): + offset ) elif _contains_cftime_datetimes(array): - if isinstance(array, dask_array_type): + if is_duck_dask_array(array): raise NotImplementedError( "Computing the mean of an array containing " "cftime.datetime objects is not yet implemented on " @@ -610,7 +610,7 @@ def rolling_window(array, axis, window, center, fill_value): Make an ndarray with a rolling window of axis-th dimension. The rolling dimension will be placed at the last dimension. """ - if isinstance(array, dask_array_type): + if is_duck_dask_array(array): return dask_array_ops.rolling_window(array, axis, window, center, fill_value) else: # np.ndarray return nputils.rolling_window(array, axis, window, center, fill_value) @@ -619,7 +619,7 @@ def rolling_window(array, axis, window, center, fill_value): def least_squares(lhs, rhs, rcond=None, skipna=False): """Return the coefficients and residuals of a least-squares fit. """ - if isinstance(rhs, dask_array_type): + if is_duck_dask_array(rhs): return dask_array_ops.least_squares(lhs, rhs, rcond=rcond, skipna=skipna) else: return nputils.least_squares(lhs, rhs, rcond=rcond, skipna=skipna) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 28eaae5f05b..94897679f86 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -12,7 +12,8 @@ from .duck_array_ops import array_equiv from .options import OPTIONS -from .pycompat import dask_array_type, sparse_array_type +from .pycompat import sparse_array_type +from .dask_array_compat import is_duck_dask_array def pretty_print(x, numchars: int): @@ -228,7 +229,7 @@ def inline_dask_repr(array): redundant information that's already printed by the repr function of the xarray wrapper. """ - assert isinstance(array, dask_array_type), array + assert is_duck_dask_array(array), array chunksize = tuple(c[0] for c in array.chunks) @@ -257,7 +258,7 @@ def inline_variable_array_repr(var, max_width): """Build a one-line summary of a variable's data.""" if var._in_memory: return format_array_flat(var, max_width) - elif isinstance(var._data, dask_array_type): + elif is_duck_dask_array(var._data): return inline_dask_repr(var.data) elif isinstance(var._data, sparse_array_type): return inline_sparse_repr(var.data) @@ -455,9 +456,7 @@ def short_data_repr(array): internal_data = getattr(array, "variable", array)._data if isinstance(array, np.ndarray): return short_numpy_repr(array) - elif hasattr(internal_data, "__array_function__") or isinstance( - internal_data, dask_array_type - ): + elif hasattr(internal_data, "__array_function__") or is_duck_dask_array(internal_data): return limit_lines(repr(array.data), limit=40) elif array._in_memory or array.size < 1e5: return short_numpy_repr(array) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index ab049a0a4b4..dbb7c32cf51 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -11,7 +11,8 @@ from . import duck_array_ops, nputils, utils from .npcompat import DTypeLike -from .pycompat import dask_array_type, integer_types, sparse_array_type +from .pycompat import integer_types, sparse_array_type +from .dask_array_compat import is_duck_dask_array from .utils import is_dict_like, maybe_cast_to_coords_dtype @@ -699,7 +700,7 @@ def as_indexable(array): return NumpyIndexingAdapter(array) if isinstance(array, pd.Index): return PandasIndexAdapter(array) - if isinstance(array, dask_array_type): + if is_duck_dask_array(array): return DaskIndexingAdapter(array) if hasattr(array, "__array_function__"): return NdArrayLikeIndexingAdapter(array) @@ -1111,7 +1112,7 @@ def _masked_result_drop_slice(key, data=None): new_keys = [] for k in key: if isinstance(k, np.ndarray): - if isinstance(data, dask_array_type): + if is_duck_dask_array(data): new_keys.append(_dask_array_with_chunks_hint(k, chunks_hint)) elif isinstance(data, sparse_array_type): import sparse diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 59d4f777c73..7c487e0ace6 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -10,7 +10,8 @@ from . import utils from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc -from .duck_array_ops import dask_array_type, datetime_to_numeric, timedelta_to_numeric +from .duck_array_ops import datetime_to_numeric, timedelta_to_numeric +from .dask_array_compat import is_duck_dask_array from .options import _get_keep_attrs from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -703,7 +704,7 @@ def interp_func(var, x, new_x, method, kwargs): else: func, kwargs = _get_interpolator_nd(method, **kwargs) - if isinstance(var, dask_array_type): + if is_duck_dask_array(var): import dask.array as da _assert_single_chunk(var, range(var.ndim - len(x), var.ndim)) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index f9989c2c8c9..6bb33676c4b 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -2,7 +2,7 @@ from . import dtypes, nputils, utils from .duck_array_ops import _dask_or_eager_func, count, fillna, isnull, where_method -from .pycompat import dask_array_type +from .dask_array_compat import is_duck_dask_array try: import dask.array as dask_array @@ -77,7 +77,7 @@ def nanmin(a, axis=None, out=None): if a.dtype.kind == "O": return _nan_minmax_object("min", dtypes.get_pos_infinity(a.dtype), a, axis) - module = dask_array if isinstance(a, dask_array_type) else nputils + module = dask_array if is_duck_dask_array(a) else nputils return module.nanmin(a, axis=axis) @@ -85,7 +85,7 @@ def nanmax(a, axis=None, out=None): if a.dtype.kind == "O": return _nan_minmax_object("max", dtypes.get_neg_infinity(a.dtype), a, axis) - module = dask_array if isinstance(a, dask_array_type) else nputils + module = dask_array if is_duck_dask_array(a) else nputils return module.nanmax(a, axis=axis) @@ -94,7 +94,7 @@ def nanargmin(a, axis=None): fill_value = dtypes.get_pos_infinity(a.dtype) return _nan_argminmax_object("argmin", fill_value, a, axis=axis) - module = dask_array if isinstance(a, dask_array_type) else nputils + module = dask_array if is_duck_dask_array(a) else nputils return module.nanargmin(a, axis=axis) @@ -103,7 +103,7 @@ def nanargmax(a, axis=None): fill_value = dtypes.get_neg_infinity(a.dtype) return _nan_argminmax_object("argmax", fill_value, a, axis=axis) - module = dask_array if isinstance(a, dask_array_type) else nputils + module = dask_array if is_duck_dask_array(a) else nputils return module.nanargmax(a, axis=axis) @@ -136,7 +136,7 @@ def nanmean(a, axis=None, dtype=None, out=None): if a.dtype.kind == "O": return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype) - if isinstance(a, dask_array_type): + if is_duck_dask_array(a): return dask_array.nanmean(a, axis=axis, dtype=dtype) return np.nanmean(a, axis=axis, dtype=dtype) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index ecba5307680..009bc19c664 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -8,7 +8,7 @@ from .dask_array_ops import dask_rolling_wrapper from .ops import inject_reduce_methods from .options import _get_keep_attrs -from .pycompat import dask_array_type +from .dask_array_compat import is_duck_dask_array try: import bottleneck @@ -338,7 +338,7 @@ def _bottleneck_reduce(self, func, **kwargs): padded = self.obj.variable if self.center: - if isinstance(padded.data, dask_array_type): + if is_duck_dask_array(padded.data): # Workaround to make the padded chunk size is larger than # self.window-1 shift = -(self.window + 1) // 2 @@ -351,7 +351,7 @@ def _bottleneck_reduce(self, func, **kwargs): valid = (slice(None),) * axis + (slice(-shift, None),) padded = padded.pad({self.dim: (0, -shift)}, mode="constant") - if isinstance(padded.data, dask_array_type): + if is_duck_dask_array(padded.data): raise AssertionError("should not be reachable") values = dask_rolling_wrapper( func, padded.data, window=self.window, min_count=min_count, axis=axis @@ -378,9 +378,7 @@ def _numpy_or_bottleneck_reduce( ) del kwargs["dim"] - if bottleneck_move_func is not None and not isinstance( - self.obj.data, dask_array_type - ): + if bottleneck_move_func is not None and not is_duck_dask_array(self.obj.data): # TODO: renable bottleneck with dask after the issues # underlying https://github.com/pydata/xarray/issues/2940 are # fixed. diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 6ef63e42291..133ff615edf 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -1,7 +1,7 @@ import numpy as np from .pdcompat import count_not_none -from .pycompat import dask_array_type +from .dask_array_compat import is_duck_dask_array def _get_alpha(com=None, span=None, halflife=None, alpha=None): @@ -13,7 +13,7 @@ def _get_alpha(com=None, span=None, halflife=None, alpha=None): def move_exp_nanmean(array, *, axis, alpha): - if isinstance(array, dask_array_type): + if is_duck_dask_array(array): raise TypeError("rolling_exp is not currently support for dask arrays") import numbagg diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c505c749557..8db3900a6ca 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -34,6 +34,7 @@ from .npcompat import IS_NEP18_ACTIVE from .options import _get_keep_attrs from .pycompat import dask_array_type, integer_types +from .dask_array_compat import is_duck_dask_array from .utils import ( OrderedSet, _default, @@ -341,9 +342,7 @@ def _in_memory(self): @property def data(self): - if hasattr(self._data, "__array_function__") or isinstance( - self._data, dask_array_type - ): + if hasattr(self._data, "__array_function__") or is_duck_dask_array(self._data): return self._data else: return self.values @@ -375,7 +374,7 @@ def load(self, **kwargs): -------- dask.array.compute """ - if isinstance(self._data, dask_array_type): + if is_duck_dask_array(self._data): self._data = as_compatible_data(self._data.compute(**kwargs)) elif not hasattr(self._data, "__array_function__"): self._data = np.asarray(self._data) @@ -410,7 +409,7 @@ def __dask_tokenize__(self): return normalize_token((type(self), self._dims, self.data, self._attrs)) def __dask_graph__(self): - if isinstance(self._data, dask_array_type): + if is_duck_dask_array(self._data): return self._data.__dask_graph__() else: return None @@ -738,7 +737,7 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA): dims, indexer, new_order = self._broadcast_indexes(key) if self.size: - if isinstance(self._data, dask_array_type): + if is_duck_dask_array(self._data): # dask's indexing is faster this way; also vindex does not # support negative indices yet: # https://github.com/dask/dask/pull/2967 @@ -885,9 +884,7 @@ def copy(self, deep=True, data=None): data = indexing.MemoryCachedArray(data.array) if deep: - if hasattr(data, "__array_function__") or isinstance( - data, dask_array_type - ): + if hasattr(data, "__array_function__") or is_duck_dask_array(data): data = data.copy() elif not isinstance(data, PandasIndexAdapter): # pandas.Index is immutable @@ -977,7 +974,7 @@ def chunk(self, chunks=None, name=None, lock=False): chunks = self.chunks or self.shape data = self._data - if isinstance(data, da.Array): + if is_duck_dask_array(data): data = data.rechunk(chunks) else: if isinstance(data, indexing.ExplicitlyIndexed): @@ -1124,7 +1121,7 @@ def _shift_one_dim(self, dim, count, fill_value=dtypes.NA): constant_values=fill_value, ) - if isinstance(data, dask_array_type): + if is_duck_dask_array(data): # chunked data should come out with the same chunks; this makes # it feasible to combine shifted and unshifted data # TODO: remove this once dask.array automatically aligns chunks @@ -1282,7 +1279,7 @@ def _roll_one_dim(self, dim, count): data = duck_array_ops.concatenate(arrays, axis) - if isinstance(data, dask_array_type): + if is_duck_dask_array(data): # chunked data should come out with the same chunks; this makes # it feasible to combine shifted and unshifted data # TODO: remove this once dask.array automatically aligns chunks @@ -1853,7 +1850,7 @@ def rank(self, dim, pct=False): data = self.data - if isinstance(data, dask_array_type): + if is_duck_dask_array(data): raise TypeError( "rank does not work for arrays stored as dask " "arrays. Load the data via .compute() or .load() " From 1ec28ec799a5eda02ac7dc65f9438036dd9a9c66 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Tue, 14 Jul 2020 10:23:12 -0500 Subject: [PATCH 02/17] Use is_dask_collection in is_duck_dask_array --- xarray/core/dask_array_compat.py | 5 +++-- xarray/core/formatting.py | 6 +++--- xarray/core/rolling_exp.py | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 6645be26f2b..587e608a07a 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -4,15 +4,16 @@ import numpy as np -from .pycompat import dask_array_type from .utils import is_array_like try: import dask.array as da from dask import __version__ as dask_version + from dask.base import is_dask_collection except ImportError: dask_version = "0.0.0" da = None + is_dask_collection = lambda _: False if LooseVersion(dask_version) >= LooseVersion("2.0.0"): meta_from_array = da.utils.meta_from_array @@ -132,7 +133,7 @@ def _validate_pad_output_shape(input_shape, pad_width, output_shape): def is_duck_dask_array(x): - return is_array_like(x) and isinstance(x, dask_array_type) + return is_array_like(x) and is_dask_collection(x) def pad(array, pad_width, mode="constant", **kwargs): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 94897679f86..e14f3eee9ff 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -12,7 +12,7 @@ from .duck_array_ops import array_equiv from .options import OPTIONS -from .pycompat import sparse_array_type +from .pycompat import dask_array_type =, sparse_array_type from .dask_array_compat import is_duck_dask_array @@ -229,7 +229,7 @@ def inline_dask_repr(array): redundant information that's already printed by the repr function of the xarray wrapper. """ - assert is_duck_dask_array(array), array + assert isinstance(array, dask_array_type), array chunksize = tuple(c[0] for c in array.chunks) @@ -258,7 +258,7 @@ def inline_variable_array_repr(var, max_width): """Build a one-line summary of a variable's data.""" if var._in_memory: return format_array_flat(var, max_width) - elif is_duck_dask_array(var._data): + elif isinstance(var._data, dask_array_type): return inline_dask_repr(var.data) elif isinstance(var._data, sparse_array_type): return inline_sparse_repr(var.data) diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 133ff615edf..49fcce81126 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -14,7 +14,7 @@ def _get_alpha(com=None, span=None, halflife=None, alpha=None): def move_exp_nanmean(array, *, axis, alpha): if is_duck_dask_array(array): - raise TypeError("rolling_exp is not currently support for dask arrays") + raise TypeError("rolling_exp is not currently support for dask-like arrays") import numbagg if axis == (): From 817c35d4ff2d223b95489bec08eb79528c5052a9 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Tue, 14 Jul 2020 10:31:43 -0500 Subject: [PATCH 03/17] Use is_dask_collection in is_duck_dask_array --- xarray/core/formatting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index e14f3eee9ff..2ff9d8880fb 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -12,7 +12,7 @@ from .duck_array_ops import array_equiv from .options import OPTIONS -from .pycompat import dask_array_type =, sparse_array_type +from .pycompat import dask_array_type, sparse_array_type from .dask_array_compat import is_duck_dask_array From bd770e159657a2d080176b6771e322c947aa5995 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Mon, 20 Jul 2020 12:24:37 -0500 Subject: [PATCH 04/17] Revert to isinstance checks according to review discussion --- xarray/convert.py | 2 +- xarray/core/dataset.py | 8 ++------ xarray/core/duck_array_ops.py | 9 +++------ xarray/core/formatting.py | 4 +++- xarray/core/indexing.py | 4 ++-- xarray/core/nanops.py | 12 ++++++------ 6 files changed, 17 insertions(+), 22 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 18726637911..727837fed40 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -254,7 +254,7 @@ def from_iris(cube): """ Convert a Iris cube into an DataArray """ import iris.exceptions - + from xarray.core.dask_array_compat import is_duck_dask_array name = _name(cube) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d24f2b0aea6..b0f915e2f8a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -647,9 +647,7 @@ def load(self, **kwargs) -> "Dataset": """ # access .data to coerce everything to numpy or dask arrays lazy_data = { - k: v._data - for k, v in self.variables.items() - if is_duck_dask_array(v._data) + k: v._data for k, v in self.variables.items() if is_duck_dask_array(v._data) } if lazy_data: import dask.array as da @@ -818,9 +816,7 @@ def _persist_inplace(self, **kwargs) -> "Dataset": """ # access .data to coerce everything to numpy or dask arrays lazy_data = { - k: v._data - for k, v in self.variables.items() - if is_duck_dask_array(v._data) + k: v._data for k, v in self.variables.items() if is_duck_dask_array(v._data) } if lazy_data: import dask diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index c58c29db1f2..4672c21be18 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -14,6 +14,7 @@ from . import dask_array_compat, dask_array_ops, dtypes, npcompat, nputils from .nputils import nanfirst, nanlast +from .pycompat import dask_array_type from .dask_array_compat import is_duck_dask_array try: @@ -104,7 +105,7 @@ def isnull(data): return zeros_like(data, dtype=bool) else: # at this point, array should have dtype=object - if is_duck_dask_array(data) or isinstance(data, np.ndarray): + if isinstance(data, (np.ndarray, dask_array_type)): return pandas_isnull(data) else: # Not reachable yet, but intended for use with other duck array @@ -190,11 +191,7 @@ def lazy_array_equiv(arr1, arr2): arr2 = asarray(arr2) if arr1.shape != arr2.shape: return False - if ( - dask_array - and is_duck_dask_array(arr1) - and is_duck_dask_array(arr2) - ): + if dask_array and is_duck_dask_array(arr1) and is_duck_dask_array(arr2): # GH3068 if arr1.name == arr2.name: return True diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 2ff9d8880fb..8494d2ae690 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -456,7 +456,9 @@ def short_data_repr(array): internal_data = getattr(array, "variable", array)._data if isinstance(array, np.ndarray): return short_numpy_repr(array) - elif hasattr(internal_data, "__array_function__") or is_duck_dask_array(internal_data): + elif hasattr(internal_data, "__array_function__") or is_duck_dask_array( + internal_data + ): return limit_lines(repr(array.data), limit=40) elif array._in_memory or array.size < 1e5: return short_numpy_repr(array) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 75f7ed61d83..76a127d694c 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -11,7 +11,7 @@ from . import duck_array_ops, nputils, utils from .npcompat import DTypeLike -from .pycompat import integer_types, sparse_array_type +from .pycompat import integer_types, sparse_array_type, dask_array_type from .dask_array_compat import is_duck_dask_array from .utils import is_dict_like, maybe_cast_to_coords_dtype @@ -700,7 +700,7 @@ def as_indexable(array): return NumpyIndexingAdapter(array) if isinstance(array, pd.Index): return PandasIndexAdapter(array) - if is_duck_dask_array(array): + if isinstance(array, dask_array_type): return DaskIndexingAdapter(array) if hasattr(array, "__array_function__"): return NdArrayLikeIndexingAdapter(array) diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 1af77e815c9..eb3657d9739 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -1,8 +1,8 @@ import numpy as np from . import dtypes, nputils, utils +from .pycompat import dask_array_type from .duck_array_ops import _dask_or_eager_func, count, fillna, isnull, where_method -from .dask_array_compat import is_duck_dask_array try: import dask.array as dask_array @@ -78,7 +78,7 @@ def nanmin(a, axis=None, out=None): if a.dtype.kind == "O": return _nan_minmax_object("min", dtypes.get_pos_infinity(a.dtype), a, axis) - module = dask_array if is_duck_dask_array(a) else nputils + module = dask_array if isinstance(a, dask_array_type) else nputils return module.nanmin(a, axis=axis) @@ -86,7 +86,7 @@ def nanmax(a, axis=None, out=None): if a.dtype.kind == "O": return _nan_minmax_object("max", dtypes.get_neg_infinity(a.dtype), a, axis) - module = dask_array if is_duck_dask_array(a) else nputils + module = dask_array if isinstance(a, dask_array_type) else nputils return module.nanmax(a, axis=axis) @@ -95,7 +95,7 @@ def nanargmin(a, axis=None): fill_value = dtypes.get_pos_infinity(a.dtype) return _nan_argminmax_object("argmin", fill_value, a, axis=axis) - module = dask_array if is_duck_dask_array(a) else nputils + module = dask_array if isinstance(a, dask_array_type) else nputils return module.nanargmin(a, axis=axis) @@ -104,7 +104,7 @@ def nanargmax(a, axis=None): fill_value = dtypes.get_neg_infinity(a.dtype) return _nan_argminmax_object("argmax", fill_value, a, axis=axis) - module = dask_array if is_duck_dask_array(a) else nputils + module = dask_array if isinstance(a, dask_array_type) else nputils return module.nanargmax(a, axis=axis) @@ -137,7 +137,7 @@ def nanmean(a, axis=None, dtype=None, out=None): if a.dtype.kind == "O": return _nanmean_ddof_object(0, a, axis=axis, dtype=dtype) - if is_duck_dask_array(a): + if isinstance(a, dask_array_type): return dask_array.nanmean(a, axis=axis, dtype=dtype) return np.nanmean(a, axis=axis, dtype=dtype) From 8ca52f62c06dd4f97e925dbc7f3b9fdb5a19350b Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Mon, 27 Jul 2020 15:25:20 -0500 Subject: [PATCH 05/17] Move is_duck_dask_array to pycompat.py and use tokenize for comparisons --- xarray/backends/common.py | 2 +- xarray/coding/strings.py | 2 +- xarray/coding/variables.py | 2 +- xarray/conventions.py | 2 +- xarray/convert.py | 2 +- xarray/core/accessor_dt.py | 2 +- xarray/core/common.py | 2 +- xarray/core/computation.py | 2 +- xarray/core/dask_array_compat.py | 8 +------ xarray/core/dataset.py | 2 +- xarray/core/duck_array_ops.py | 9 ++++---- xarray/core/formatting.py | 3 +-- xarray/core/indexing.py | 8 +++++-- xarray/core/missing.py | 2 +- xarray/core/nanops.py | 2 +- xarray/core/pycompat.py | 12 +++++++++- xarray/core/rolling.py | 2 +- xarray/core/rolling_exp.py | 2 +- xarray/core/variable.py | 7 +++++- xarray/tests/test_units.py | 39 +++++++++++++++++++++++++++++++- 20 files changed, 81 insertions(+), 31 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index b5b615c988d..76ea83965b8 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -8,7 +8,7 @@ from ..conventions import cf_encoder from ..core import indexing -from ..core.dask_array_compat import is_duck_dask_array +from ..core.pycompat import is_duck_dask_array from ..core.utils import FrozenDict, NdimSizeLenMixin # Create a logger object, but don't add any handlers. Leave that to user code. diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 706536d4188..ce8966fca59 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -4,7 +4,7 @@ import numpy as np from ..core import indexing -from ..core.dask_array_compat import is_duck_dask_array +from ..core.pycompat import is_duck_dask_array from ..core.variable import Variable from .variables import ( VariableCoder, diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 06f7be26a45..45befe55da5 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -7,7 +7,7 @@ import pandas as pd from ..core import dtypes, duck_array_ops, indexing -from ..core.dask_array_compat import is_duck_dask_array +from ..core.pycompat import is_duck_dask_array from ..core.variable import Variable diff --git a/xarray/conventions.py b/xarray/conventions.py index 0c9220056ed..7e46741f3c9 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -8,7 +8,7 @@ from .coding.variables import SerializationWarning, pop_to from .core import duck_array_ops, indexing from .core.common import contains_cftime_datetimes -from .core.dask_array_compat import is_duck_dask_array +from .core.pycompat import is_duck_dask_array from .core.variable import IndexVariable, Variable, as_variable diff --git a/xarray/convert.py b/xarray/convert.py index 727837fed40..4daec3b31d2 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -255,7 +255,7 @@ def from_iris(cube): """ import iris.exceptions - from xarray.core.dask_array_compat import is_duck_dask_array + from xarray.core.pycompat import is_duck_dask_array name = _name(cube) if name == "unknown": diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py index 802fd8dfa3b..e5a6c543968 100644 --- a/xarray/core/accessor_dt.py +++ b/xarray/core/accessor_dt.py @@ -6,7 +6,7 @@ is_np_datetime_like, is_np_timedelta_like, ) -from .dask_array_compat import is_duck_dask_array +from .pycompat import is_duck_dask_array def _season_from_months(months): diff --git a/xarray/core/common.py b/xarray/core/common.py index 535c2ece8ae..d4cd8758539 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -23,7 +23,7 @@ from .arithmetic import SupportsArithmetic from .npcompat import DTypeLike from .options import OPTIONS, _get_keep_attrs -from .dask_array_compat import is_duck_dask_array +from .pycompat import is_duck_dask_array from .rolling_exp import RollingExp from .utils import Frozen, either_dict_or_kwargs, is_scalar diff --git a/xarray/core/computation.py b/xarray/core/computation.py index b9e9d33a0bc..5b0f991d2c9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -27,7 +27,7 @@ from .alignment import align, deep_align from .merge import merge_coordinates_without_align from .options import OPTIONS -from .dask_array_compat import is_duck_dask_array +from .pycompat import is_duck_dask_array from .utils import is_dict_like from .variable import Variable diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 587e608a07a..821c8ec78a8 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -4,16 +4,14 @@ import numpy as np -from .utils import is_array_like +from .pycompat import is_duck_dask_array try: import dask.array as da from dask import __version__ as dask_version - from dask.base import is_dask_collection except ImportError: dask_version = "0.0.0" da = None - is_dask_collection = lambda _: False if LooseVersion(dask_version) >= LooseVersion("2.0.0"): meta_from_array = da.utils.meta_from_array @@ -132,10 +130,6 @@ def _validate_pad_output_shape(input_shape, pad_width, output_shape): ) -def is_duck_dask_array(x): - return is_array_like(x) and is_dask_collection(x) - - def pad(array, pad_width, mode="constant", **kwargs): padded = da.pad(array, pad_width, mode=mode, **kwargs) # workaround for inconsistency between numpy and dask: https://github.com/dask/dask/issues/5303 diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b0f915e2f8a..2b7f2ba1b6f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -80,7 +80,7 @@ ) from .missing import get_clean_interp_index from .options import OPTIONS, _get_keep_attrs -from .dask_array_compat import is_duck_dask_array +from .pycompat import is_duck_dask_array from .utils import ( Default, Frozen, diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index d53558d5752..72077a52220 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -17,6 +17,7 @@ try: import dask.array as dask_array + from dask.base import tokenize except ImportError: dask_array = None # type: ignore @@ -176,10 +177,10 @@ def as_shared_dtype(scalars_or_arrays): def lazy_array_equiv(arr1, arr2): """Like array_equal, but doesn't actually compare values. - Returns True when arr1, arr2 identical or their dask names are equal. + Returns True when arr1, arr2 identical or their dask tokens are equal. Returns False when shapes are not equal. Returns None when equality cannot determined: one or both of arr1, arr2 are numpy arrays; - or their dask names are not equal + or their dask tokens are not equal """ if arr1 is arr2: return True @@ -188,8 +189,8 @@ def lazy_array_equiv(arr1, arr2): if arr1.shape != arr2.shape: return False if dask_array and is_duck_dask_array(arr1) and is_duck_dask_array(arr2): - # GH3068 - if arr1.name == arr2.name: + # GH3068, GH4221 + if tokenize(arr1) == tokenize(arr2): return True else: return None diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 8494d2ae690..91365a00f97 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -12,8 +12,7 @@ from .duck_array_ops import array_equiv from .options import OPTIONS -from .pycompat import dask_array_type, sparse_array_type -from .dask_array_compat import is_duck_dask_array +from .pycompat import dask_array_type, sparse_array_type, is_duck_dask_array def pretty_print(x, numchars: int): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 76a127d694c..df40dfaa24f 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -11,8 +11,12 @@ from . import duck_array_ops, nputils, utils from .npcompat import DTypeLike -from .pycompat import integer_types, sparse_array_type, dask_array_type -from .dask_array_compat import is_duck_dask_array +from .pycompat import ( + integer_types, + sparse_array_type, + dask_array_type, + is_duck_dask_array, +) from .utils import is_dict_like, maybe_cast_to_coords_dtype diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 7c487e0ace6..46e45ef8f5b 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -11,7 +11,7 @@ from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import datetime_to_numeric, timedelta_to_numeric -from .dask_array_compat import is_duck_dask_array +from .pycompat import is_duck_dask_array from .options import _get_keep_attrs from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index eb3657d9739..41c8d258d7a 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -1,8 +1,8 @@ import numpy as np from . import dtypes, nputils, utils -from .pycompat import dask_array_type from .duck_array_ops import _dask_or_eager_func, count, fillna, isnull, where_method +from .pycompat import dask_array_type try: import dask.array as dask_array diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index dcb78d17cf8..d89216cd7cd 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -1,14 +1,24 @@ import numpy as np +from .utils import is_array_like + integer_types = (int, np.integer) try: - # solely for isinstance checks import dask.array + from dask.base import is_dask_collection + # solely for isinstance checks dask_array_type = (dask.array.Array,) + + def is_duck_dask_array(x): + return is_array_like(x) and is_dask_collection(x) + + except ImportError: # pragma: no cover dask_array_type = () + is_duck_dask_array = lambda _: False + is_dask_collection = lambda _: False try: # solely for isinstance checks diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 009bc19c664..2530c40b330 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -8,7 +8,7 @@ from .dask_array_ops import dask_rolling_wrapper from .ops import inject_reduce_methods from .options import _get_keep_attrs -from .dask_array_compat import is_duck_dask_array +from .pycompat import is_duck_dask_array try: import bottleneck diff --git a/xarray/core/rolling_exp.py b/xarray/core/rolling_exp.py index 49fcce81126..20b6e2a507f 100644 --- a/xarray/core/rolling_exp.py +++ b/xarray/core/rolling_exp.py @@ -1,7 +1,7 @@ import numpy as np from .pdcompat import count_not_none -from .dask_array_compat import is_duck_dask_array +from .pycompat import is_duck_dask_array def _get_alpha(com=None, span=None, halflife=None, alpha=None): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1b97c923d46..8c98b6823c7 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -33,7 +33,12 @@ ) from .npcompat import IS_NEP18_ACTIVE from .options import _get_keep_attrs -from .pycompat import cupy_array_type, dask_array_type, integer_types, is_duck_dask_array +from .pycompat import ( + cupy_array_type, + dask_array_type, + integer_types, + is_duck_dask_array, +) from .utils import ( OrderedSet, _default, diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 619fa10116d..67b1fd9d9d8 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -9,7 +9,13 @@ from xarray.core import dtypes from xarray.core.npcompat import IS_NEP18_ACTIVE -from . import assert_allclose, assert_duckarray_allclose, assert_equal, assert_identical +from . import ( + assert_allclose, + assert_duckarray_allclose, + assert_equal, + assert_identical, + requires_dask, +) from .test_variable import _PAD_XR_NP_ARGS pint = pytest.importorskip("pint") @@ -2231,6 +2237,21 @@ def test_pad_unit_constant_value(self, unit, error, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) + @requires_dask + @pytest.mark.xfail + def test_tokenize_duck_dask_array(self, dtype): + import dask + + array = dask.array.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) + q = unit_registry.Quantity(array, unit_registry.m) + variable = xr.Variable(("x", "y"), q) + + token = dask.base.tokenize(variable) + post_op = variable + 5 * unit_registry.m + + assert dask.base.tokenize(variable) != dask.base.tokenize(post_op) + assert dask.base.tokenize(variable) == token + class TestDataArray: @pytest.mark.parametrize( @@ -3842,6 +3863,22 @@ def test_grouped_operations(self, func, variant, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) + @requires_dask + @pytest.mark.xfail + def test_tokenize_duck_dask_array(self, dtype): + import dask + + array = dask.array.linspace(0, 5, 10).astype(dtype) + time = pd.date_range("10-09-2010", periods=len(array), freq="1y") + q = unit_registry.Quantity(array, unit_registry.m) + data_array = xr.DataArray(data=q, coords={"time": time}, dims="time") + + token = dask.base.tokenize(data_array) + post_op = data_array + 5 * unit_registry.m + + assert dask.base.tokenize(data_array) != dask.base.tokenize(post_op) + assert dask.base.tokenize(data_array) == token + class TestDataset: @pytest.mark.parametrize( From e405c5700f27d1a418abe6dac93fe1b6d060a462 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Mon, 27 Jul 2020 15:35:59 -0500 Subject: [PATCH 06/17] isort --- xarray/core/formatting.py | 2 +- xarray/core/indexing.py | 4 ++-- xarray/core/missing.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 91365a00f97..1e657fcb486 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -12,7 +12,7 @@ from .duck_array_ops import array_equiv from .options import OPTIONS -from .pycompat import dask_array_type, sparse_array_type, is_duck_dask_array +from .pycompat import dask_array_type, is_duck_dask_array, sparse_array_type def pretty_print(x, numchars: int): diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index df40dfaa24f..abaa9d0dfb1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -12,10 +12,10 @@ from . import duck_array_ops, nputils, utils from .npcompat import DTypeLike from .pycompat import ( - integer_types, - sparse_array_type, dask_array_type, + integer_types, is_duck_dask_array, + sparse_array_type, ) from .utils import is_dict_like, maybe_cast_to_coords_dtype diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 46e45ef8f5b..848783b0e1f 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -11,8 +11,8 @@ from .common import _contains_datetime_like_objects, ones_like from .computation import apply_ufunc from .duck_array_ops import datetime_to_numeric, timedelta_to_numeric -from .pycompat import is_duck_dask_array from .options import _get_keep_attrs +from .pycompat import is_duck_dask_array from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables From dcffe078fe5d3972e6a6ac7186d0c44dd2049931 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Thu, 20 Aug 2020 22:04:29 -0500 Subject: [PATCH 07/17] Implement `is_duck_array` to replace `is_array_like` * Rename `is_array_like` to `is_duck_array` * `is_duck_array` checks for `__array_function__` and `__array_ufunc__` in addition to previous checks * Replace checks for `is_duck_dask_array` and `__array_function__` with `is_duck_array` --- xarray/core/duck_array_ops.py | 7 ++----- xarray/core/formatting.py | 4 ++-- xarray/core/pycompat.py | 4 ++-- xarray/core/utils.py | 8 ++++++-- xarray/core/variable.py | 5 +++-- xarray/testing.py | 8 ++++---- 6 files changed, 19 insertions(+), 17 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 7cd796c3bb4..1fe5b57dcb9 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -21,6 +21,7 @@ sparse_array_type, is_duck_dask_array, ) +from .utils import is_duck_array try: import dask.array as dask_array @@ -180,11 +181,7 @@ def astype(data, **kwargs): def asarray(data, xp=np): - return ( - data - if (is_duck_dask_array(data) or hasattr(data, "__array_function__")) - else xp.asarray(data) - ) + return data if is_duck_array(data) else xp.asarray(data) def as_shared_dtype(scalars_or_arrays): diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index c2e5fb73b7f..6870daaa000 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -527,7 +527,7 @@ def diff_dim_summary(a, b): def _diff_mapping_repr(a_mapping, b_mapping, compat, title, summarizer, col_width=None): - def is_array_like(value): + def is_duck_array(value): return ( hasattr(value, "ndim") and hasattr(value, "shape") @@ -559,7 +559,7 @@ def extra_items_repr(extra_keys, mapping, ab_side): is_variable = True except AttributeError: # compare attribute value - if is_array_like(a_mapping[k]) or is_array_like(b_mapping[k]): + if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]): compatible = array_equiv(a_mapping[k], b_mapping[k]) else: compatible = a_mapping[k] == b_mapping[k] diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index d89216cd7cd..8d613038957 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -1,6 +1,6 @@ import numpy as np -from .utils import is_array_like +from .utils import is_duck_array integer_types = (int, np.integer) @@ -12,7 +12,7 @@ dask_array_type = (dask.array.Array,) def is_duck_dask_array(x): - return is_array_like(x) and is_dask_collection(x) + return is_duck_array(x) and is_dask_collection(x) except ImportError: # pragma: no cover diff --git a/xarray/core/utils.py b/xarray/core/utils.py index ac060215848..4197105d600 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -247,9 +247,13 @@ def is_list_like(value: Any) -> bool: return isinstance(value, list) or isinstance(value, tuple) -def is_array_like(value: Any) -> bool: +def is_duck_array(value: Any) -> bool: return ( - hasattr(value, "ndim") and hasattr(value, "shape") and hasattr(value, "dtype") + hasattr(value, "ndim") + and hasattr(value, "shape") + and hasattr(value, "dtype") + and hasattr(value, "__array_function__") + and hasattr(value, "__array_ufunc__") ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index fce1d55dcb5..375940914cc 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -47,6 +47,7 @@ either_dict_or_kwargs, ensure_us_time_resolution, infix_dims, + is_duck_array, ) NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -348,7 +349,7 @@ def _in_memory(self): @property def data(self): - if hasattr(self._data, "__array_function__") or is_duck_dask_array(self._data): + if is_duck_array(self._data): return self._data else: return self.values @@ -936,7 +937,7 @@ def copy(self, deep=True, data=None): data = indexing.MemoryCachedArray(data.array) if deep: - if hasattr(data, "__array_function__") or is_duck_dask_array(data): + if is_duck_array(data): data = data.copy() elif not isinstance(data, PandasIndexAdapter): # pandas.Index is immutable diff --git a/xarray/testing.py b/xarray/testing.py index ec479ef09d4..c3593d585e1 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -196,14 +196,14 @@ def assert_duckarray_equal(x, y, err_msg="", verbose=True): """ Like `np.testing.assert_array_equal`, but for duckarrays """ __tracebackhide__ = True - if not utils.is_array_like(x) and not utils.is_scalar(x): + if not utils.is_duck_array(x) and not utils.is_scalar(x): x = np.asarray(x) - if not utils.is_array_like(y) and not utils.is_scalar(y): + if not utils.is_duck_array(y) and not utils.is_scalar(y): y = np.asarray(y) - if (utils.is_array_like(x) and utils.is_scalar(y)) or ( - utils.is_scalar(x) and utils.is_array_like(y) + if (utils.is_duck_array(x) and utils.is_scalar(y)) or ( + utils.is_scalar(x) and utils.is_duck_array(y) ): equiv = (x == y).all() else: From aacef5019f2bf2e35e1bd996269c826bd9d5f19a Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Fri, 21 Aug 2020 14:19:37 -0500 Subject: [PATCH 08/17] Skip numpy duck array tests when NEP18 is not active --- xarray/core/duck_array_ops.py | 2 +- xarray/tests/test_testing.py | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 1fe5b57dcb9..5e932c7a57a 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -18,8 +18,8 @@ from .pycompat import ( cupy_array_type, dask_array_type, - sparse_array_type, is_duck_dask_array, + sparse_array_type, ) from .utils import is_duck_array diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_testing.py index adc29a3cc92..0f2ae8b31d4 100644 --- a/xarray/tests/test_testing.py +++ b/xarray/tests/test_testing.py @@ -2,6 +2,7 @@ import pytest import xarray as xr +from xarray.core.npcompat import IS_NEP18_ACTIVE from . import has_dask @@ -98,7 +99,14 @@ def test_assert_duckarray_equal_failing(duckarray, obj1, obj2): @pytest.mark.parametrize( "duckarray", ( - pytest.param(np.array, id="numpy"), + pytest.param( + np.array, + id="numpy", + marks=pytest.mark.skipif( + not IS_NEP18_ACTIVE, + reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled", + ), + ), pytest.param( dask_from_array, id="dask", From fd1db22ccfb5dd5ff688471aacdb6a0841345b81 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Wed, 26 Aug 2020 17:25:06 -0500 Subject: [PATCH 09/17] Use utils.is_duck_array in xarray/core/formatting.py * Replace locally defined `is_duck_array` in _diff_mapping_repr * Replace `"__array_function__"` and `is_duck_dask_array` check in `short_data_repr` --- xarray/core/formatting.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 3a8432f7799..3ed8c6dc241 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -12,7 +12,8 @@ from .duck_array_ops import array_equiv from .options import OPTIONS -from .pycompat import dask_array_type, is_duck_dask_array, sparse_array_type +from .pycompat import dask_array_type, sparse_array_type +from .utils import is_duck_array def pretty_print(x, numchars: int): @@ -457,9 +458,7 @@ def short_data_repr(array): internal_data = getattr(array, "variable", array)._data if isinstance(array, np.ndarray): return short_numpy_repr(array) - elif hasattr(internal_data, "__array_function__") or is_duck_dask_array( - internal_data - ): + elif is_duck_array(internal_data): return limit_lines(repr(array.data), limit=40) elif array._in_memory or array.size < 1e5: return short_numpy_repr(array) @@ -527,13 +526,6 @@ def diff_dim_summary(a, b): def _diff_mapping_repr(a_mapping, b_mapping, compat, title, summarizer, col_width=None): - def is_duck_array(value): - return ( - hasattr(value, "ndim") - and hasattr(value, "shape") - and hasattr(value, "dtype") - ) - def extra_items_repr(extra_keys, mapping, ab_side): extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys] if extra_repr: From 3d81fcdfbe322d1e929d913a42343e9f41dbe88e Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Wed, 26 Aug 2020 17:26:21 -0500 Subject: [PATCH 10/17] Revert back to isinstance check for iris cube --- xarray/convert.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 4daec3b31d2..fb731ce11e8 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -10,6 +10,7 @@ from .core import duck_array_ops from .core.dataarray import DataArray from .core.dtypes import get_fill_value +from .core.pycompat import dask_array_type cdms2_ignored_attrs = {"name", "tileIndex"} iris_forbidden_keys = { @@ -255,8 +256,6 @@ def from_iris(cube): """ import iris.exceptions - from xarray.core.pycompat import is_duck_dask_array - name = _name(cube) if name == "unknown": name = None @@ -291,7 +290,7 @@ def from_iris(cube): cube_data = cube.core_data() if hasattr(cube, "core_data") else cube.data # Deal with dask and numpy masked arrays - if is_duck_dask_array(cube_data): + if isinstance(cube_data, dask_array_type): from dask.array import ma as dask_ma filled_data = dask_ma.filled(cube_data, get_fill_value(cube.dtype)) From f2cbf1e16bb8c5a6ba32b9c8e881687751047b5f Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Thu, 27 Aug 2020 19:24:51 -0500 Subject: [PATCH 11/17] Add is_duck_array_or_ndarray function to utils --- xarray/core/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index c7e94c35e8b..e659c0db44a 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -31,6 +31,8 @@ import numpy as np import pandas as pd +from .npcompat import IS_NEP18_ACTIVE + K = TypeVar("K") V = TypeVar("V") T = TypeVar("T") @@ -257,6 +259,12 @@ def is_duck_array(value: Any) -> bool: ) +def is_duck_array_or_ndarray(value: Any) -> bool: + return is_duck_array(value) or ( + not IS_NEP18_ACTIVE and isinstance(value, np.ndarray) + ) + + def either_dict_or_kwargs( pos_kwargs: Optional[Mapping[Hashable, T]], kw_kwargs: Mapping[str, T], From d6189a9d0702669ceb165a2c7a779589880a3d9d Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Thu, 27 Aug 2020 19:27:27 -0500 Subject: [PATCH 12/17] Use is_duck_array_or_ndarray for duck array checks without NEP18 --- xarray/core/formatting.py | 6 ++++-- xarray/core/variable.py | 6 ++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 3ed8c6dc241..8a7f84aa95b 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -13,7 +13,7 @@ from .duck_array_ops import array_equiv from .options import OPTIONS from .pycompat import dask_array_type, sparse_array_type -from .utils import is_duck_array +from .utils import is_duck_array, is_duck_array_or_ndarray def pretty_print(x, numchars: int): @@ -551,7 +551,9 @@ def extra_items_repr(extra_keys, mapping, ab_side): is_variable = True except AttributeError: # compare attribute value - if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]): + if is_duck_array_or_ndarray(a_mapping[k]) or is_duck_array_or_ndarray( + b_mapping[k] + ): compatible = array_equiv(a_mapping[k], b_mapping[k]) else: compatible = a_mapping[k] == b_mapping[k] diff --git a/xarray/core/variable.py b/xarray/core/variable.py index aa1a9b6d156..b330b4e9ad5 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -48,6 +48,7 @@ ensure_us_time_resolution, infix_dims, is_duck_array, + is_duck_array_or_ndarray, ) NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -936,10 +937,7 @@ def copy(self, deep=True, data=None): # don't share caching between copies data = indexing.MemoryCachedArray(data.array) - if deep and ( - is_duck_array(data) - or (not IS_NEP18_ACTIVE and isinstance(data, np.ndarray)) - ): + if deep and is_duck_array_or_ndarray(data): data = copy.deepcopy(data) else: From 70fc50c61022f310bf03f927291fbe90465f7166 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Thu, 27 Aug 2020 22:30:34 -0500 Subject: [PATCH 13/17] Remove is_duck_dask_array_or_ndarray, replace checks with is_duck_array * Add explicit check for NumPy array to is_duck_array * Replace is_duck_array_or_ndarray checks with is_duck_array --- xarray/core/formatting.py | 6 ++---- xarray/core/utils.py | 10 ++-------- xarray/core/variable.py | 3 +-- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 8a7f84aa95b..3ed8c6dc241 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -13,7 +13,7 @@ from .duck_array_ops import array_equiv from .options import OPTIONS from .pycompat import dask_array_type, sparse_array_type -from .utils import is_duck_array, is_duck_array_or_ndarray +from .utils import is_duck_array def pretty_print(x, numchars: int): @@ -551,9 +551,7 @@ def extra_items_repr(extra_keys, mapping, ab_side): is_variable = True except AttributeError: # compare attribute value - if is_duck_array_or_ndarray(a_mapping[k]) or is_duck_array_or_ndarray( - b_mapping[k] - ): + if is_duck_array(a_mapping[k]) or is_duck_array(b_mapping[k]): compatible = array_equiv(a_mapping[k], b_mapping[k]) else: compatible = a_mapping[k] == b_mapping[k] diff --git a/xarray/core/utils.py b/xarray/core/utils.py index e659c0db44a..cfb627f7af5 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -31,8 +31,6 @@ import numpy as np import pandas as pd -from .npcompat import IS_NEP18_ACTIVE - K = TypeVar("K") V = TypeVar("V") T = TypeVar("T") @@ -250,6 +248,8 @@ def is_list_like(value: Any) -> bool: def is_duck_array(value: Any) -> bool: + if isinstance(value, np.ndarray): + return True return ( hasattr(value, "ndim") and hasattr(value, "shape") @@ -259,12 +259,6 @@ def is_duck_array(value: Any) -> bool: ) -def is_duck_array_or_ndarray(value: Any) -> bool: - return is_duck_array(value) or ( - not IS_NEP18_ACTIVE and isinstance(value, np.ndarray) - ) - - def either_dict_or_kwargs( pos_kwargs: Optional[Mapping[Hashable, T]], kw_kwargs: Mapping[str, T], diff --git a/xarray/core/variable.py b/xarray/core/variable.py index b330b4e9ad5..45f3004c348 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -48,7 +48,6 @@ ensure_us_time_resolution, infix_dims, is_duck_array, - is_duck_array_or_ndarray, ) NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -937,7 +936,7 @@ def copy(self, deep=True, data=None): # don't share caching between copies data = indexing.MemoryCachedArray(data.array) - if deep and is_duck_array_or_ndarray(data): + if deep and is_duck_array(data): data = copy.deepcopy(data) else: From 91c283a7f724e106bd3383a50573bfd7f5924bb8 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Sat, 29 Aug 2020 13:20:21 -0500 Subject: [PATCH 14/17] Remove is_duck_array check for deep copy Co-authored-by: keewis --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 45f3004c348..0042761503f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -936,7 +936,7 @@ def copy(self, deep=True, data=None): # don't share caching between copies data = indexing.MemoryCachedArray(data.array) - if deep and is_duck_array(data): + if deep: data = copy.deepcopy(data) else: From a51eb66e73af40dfc0a2529139b3dca5da0fc2f1 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Mon, 31 Aug 2020 10:57:20 -0500 Subject: [PATCH 15/17] Use is_duck_array check in load --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 917d0bbb783..a12e8560d03 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -433,7 +433,7 @@ def load(self, **kwargs): """ if is_duck_dask_array(self._data): self._data = as_compatible_data(self._data.compute(**kwargs)) - elif not hasattr(self._data, "__array_function__"): + elif not is_duck_array(self._data): self._data = np.asarray(self._data) return self From 256d43d747675164a440dbe263132f2233987709 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Mon, 31 Aug 2020 11:07:50 -0500 Subject: [PATCH 16/17] Move duck dask array tokenize tests from test_units.py to test_dask.py --- xarray/tests/__init__.py | 1 + xarray/tests/test_dask.py | 43 ++++++++++++++++++++++++++++++++++++++ xarray/tests/test_units.py | 39 +--------------------------------- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 6ad30007f9f..47130fbbb67 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -78,6 +78,7 @@ def LooseVersion(vstring): has_seaborn, requires_seaborn = _importorskip("seaborn") has_sparse, requires_sparse = _importorskip("sparse") has_cartopy, requires_cartopy = _importorskip("cartopy") +has_pint, requires_pint = _importorskip("pint") # some special cases has_scipy_or_netCDF4 = has_scipy or has_netCDF4 diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 358ea731b90..dc37b80fb36 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -24,6 +24,7 @@ assert_frame_equal, assert_identical, raises_regex, + requires_pint, requires_scipy_or_netCDF4, ) from .test_backends import create_tmp_file @@ -292,6 +293,26 @@ def test_persist(self): self.assertLazyAndAllClose(u + 1, v) self.assertLazyAndAllClose(u + 1, v2) + @requires_pint + def test_tokenize_duck_dask_array(self): + import pint + + pytest.mark.skipif( + pint.__version__ < "0.15", + reason="Dask tokenize is not implemented for Pint < 0.15", + ) + unit_registry = pint.UnitRegistry() + + q = unit_registry.Quantity(self.data, "meter") + variable = xr.Variable(("x", "y"), q) + + token = dask.base.tokenize(variable) + post_op = variable + 5 * unit_registry.meter + + assert dask.base.tokenize(variable) != dask.base.tokenize(post_op) + # Immutability check + assert dask.base.tokenize(variable) == token + class TestDataArrayAndDataset(DaskTestCase): def assertLazyAndIdentical(self, expected, actual): @@ -715,6 +736,28 @@ def test_from_dask_variable(self): a = DataArray(self.lazy_array.variable, coords={"x": range(4)}, name="foo") self.assertLazyAndIdentical(self.lazy_array, a) + @requires_pint + def test_tokenize_duck_dask_array(self): + import pint + + pytest.mark.skipif( + pint.__version__ < "0.15", + reason="Dask tokenize is not implemented for Pint < 0.15", + ) + unit_registry = pint.UnitRegistry() + + q = unit_registry.Quantity(self.data, unit_registry.meter) + data_array = xr.DataArray( + data=q, coords={"x": range(4)}, dims=("x", "y"), name="foo" + ) + + token = dask.base.tokenize(data_array) + post_op = data_array + 5 * unit_registry.meter + + assert dask.base.tokenize(data_array) != dask.base.tokenize(post_op) + # Immutability check + assert dask.base.tokenize(data_array) == token + class TestToDaskDataFrame: def test_to_dask_dataframe(self): diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 5d276791c64..525c1e8fc33 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -9,13 +9,7 @@ from xarray.core import dtypes from xarray.core.npcompat import IS_NEP18_ACTIVE -from . import ( - assert_allclose, - assert_duckarray_allclose, - assert_equal, - assert_identical, - requires_dask, -) +from . import assert_allclose, assert_duckarray_allclose, assert_equal, assert_identical from .test_variable import _PAD_XR_NP_ARGS pint = pytest.importorskip("pint") @@ -2243,21 +2237,6 @@ def test_pad_unit_constant_value(self, unit, error, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) - @requires_dask - @pytest.mark.xfail - def test_tokenize_duck_dask_array(self, dtype): - import dask - - array = dask.array.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) - q = unit_registry.Quantity(array, unit_registry.m) - variable = xr.Variable(("x", "y"), q) - - token = dask.base.tokenize(variable) - post_op = variable + 5 * unit_registry.m - - assert dask.base.tokenize(variable) != dask.base.tokenize(post_op) - assert dask.base.tokenize(variable) == token - class TestDataArray: @pytest.mark.parametrize( @@ -3881,22 +3860,6 @@ def test_grouped_operations(self, func, variant, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) - @requires_dask - @pytest.mark.xfail - def test_tokenize_duck_dask_array(self, dtype): - import dask - - array = dask.array.linspace(0, 5, 10).astype(dtype) - time = pd.date_range("10-09-2010", periods=len(array), freq="1y") - q = unit_registry.Quantity(array, unit_registry.m) - data_array = xr.DataArray(data=q, coords={"time": time}, dims="time") - - token = dask.base.tokenize(data_array) - post_op = data_array + 5 * unit_registry.m - - assert dask.base.tokenize(data_array) != dask.base.tokenize(post_op) - assert dask.base.tokenize(data_array) == token - class TestDataset: @pytest.mark.parametrize( From 4e7409c94f4261df07b8e7d720dd00f5f7a27901 Mon Sep 17 00:00:00 2001 From: Russell Manser Date: Mon, 31 Aug 2020 13:45:34 -0500 Subject: [PATCH 17/17] Use _importorskip to require pint >=0.15 instead of pytest.mark.skipif --- xarray/tests/__init__.py | 3 ++- xarray/tests/test_dask.py | 14 +++----------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 47130fbbb67..9e1fdc0df33 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -78,7 +78,8 @@ def LooseVersion(vstring): has_seaborn, requires_seaborn = _importorskip("seaborn") has_sparse, requires_sparse = _importorskip("sparse") has_cartopy, requires_cartopy = _importorskip("cartopy") -has_pint, requires_pint = _importorskip("pint") +# Need Pint 0.15 for __dask_tokenize__ tests for Quantity wrapped Dask Arrays +has_pint_0_15, requires_pint_0_15 = _importorskip("pint", minversion="0.15") # some special cases has_scipy_or_netCDF4 = has_scipy or has_netCDF4 diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index dc37b80fb36..46685a29a47 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -24,7 +24,7 @@ assert_frame_equal, assert_identical, raises_regex, - requires_pint, + requires_pint_0_15, requires_scipy_or_netCDF4, ) from .test_backends import create_tmp_file @@ -293,14 +293,10 @@ def test_persist(self): self.assertLazyAndAllClose(u + 1, v) self.assertLazyAndAllClose(u + 1, v2) - @requires_pint + @requires_pint_0_15(reason="Need __dask_tokenize__") def test_tokenize_duck_dask_array(self): import pint - pytest.mark.skipif( - pint.__version__ < "0.15", - reason="Dask tokenize is not implemented for Pint < 0.15", - ) unit_registry = pint.UnitRegistry() q = unit_registry.Quantity(self.data, "meter") @@ -736,14 +732,10 @@ def test_from_dask_variable(self): a = DataArray(self.lazy_array.variable, coords={"x": range(4)}, name="foo") self.assertLazyAndIdentical(self.lazy_array, a) - @requires_pint + @requires_pint_0_15(reason="Need __dask_tokenize__") def test_tokenize_duck_dask_array(self): import pint - pytest.mark.skipif( - pint.__version__ < "0.15", - reason="Dask tokenize is not implemented for Pint < 0.15", - ) unit_registry = pint.UnitRegistry() q = unit_registry.Quantity(self.data, unit_registry.meter)