From 3f587b43660ab52a1ef4cd5828e57478ceb9f34a Mon Sep 17 00:00:00 2001 From: Mick <43316012+headtr1ck@users.noreply.github.com> Date: Fri, 27 May 2022 18:03:10 +0200 Subject: [PATCH 1/6] Improved DataArray typing (#6637) * add .env to gitignore * improved typing for dataarray * even more typing * even further typing * finish typing dataArray * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix some cast type not imported problems * fix another cast import bug * fix some error message regexes * fix import and typo * fix wrong case in intp_dimorder test * type all test_dataarray tests * fix typing in test_dataarray Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .gitignore | 2 + xarray/core/alignment.py | 28 +- xarray/core/dataarray.py | 838 ++++++++++++++++++------------ xarray/core/dataset.py | 98 ++-- xarray/core/types.py | 32 ++ xarray/core/variable.py | 23 +- xarray/tests/__init__.py | 5 +- xarray/tests/test_dataarray.py | 921 +++++++++++++++++---------------- xarray/tests/test_interp.py | 161 +++--- 9 files changed, 1187 insertions(+), 921 deletions(-) diff --git a/.gitignore b/.gitignore index 686c7efa701..293e79fe806 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ *.py[cod] __pycache__ +.env +.venv # example caches from Hypothesis .hypothesis/ diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 7b206fceeeb..df8b3c24a91 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -16,6 +16,7 @@ Tuple, Type, TypeVar, + cast, ) import numpy as np @@ -30,7 +31,7 @@ if TYPE_CHECKING: from .dataarray import DataArray from .dataset import Dataset - from .types import JoinOptions + from .types import JoinOptions, T_DataArray, T_DataArrayOrSet, T_Dataset DataAlignable = TypeVar("DataAlignable", bound=DataWithCoords) @@ -559,7 +560,7 @@ def align(self) -> None: def align( *objects: DataAlignable, join: JoinOptions = "inner", - copy=True, + copy: bool = True, indexes=None, exclude=frozenset(), fill_value=dtypes.NA, @@ -592,7 +593,7 @@ def align( those of the first object with that dimension. Indexes for the same dimension must have the same size in all objects. - copy : bool, optional + copy : bool, default: True If ``copy=True``, data in the return values is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with the input. @@ -609,7 +610,7 @@ def align( Returns ------- - aligned : DataArray or Dataset + aligned : tuple of DataArray or Dataset Tuple of objects with the same type as `*objects` with aligned coordinates. @@ -935,7 +936,9 @@ def _get_broadcast_dims_map_common_coords(args, exclude): return dims_map, common_coords -def _broadcast_helper(arg, exclude, dims_map, common_coords): +def _broadcast_helper( + arg: T_DataArrayOrSet, exclude, dims_map, common_coords +) -> T_DataArrayOrSet: from .dataarray import DataArray from .dataset import Dataset @@ -950,22 +953,25 @@ def _set_dims(var): return var.set_dims(var_dims_map) - def _broadcast_array(array): + def _broadcast_array(array: T_DataArray) -> T_DataArray: data = _set_dims(array.variable) coords = dict(array.coords) coords.update(common_coords) - return DataArray(data, coords, data.dims, name=array.name, attrs=array.attrs) + return array.__class__( + data, coords, data.dims, name=array.name, attrs=array.attrs + ) - def _broadcast_dataset(ds): + def _broadcast_dataset(ds: T_Dataset) -> T_Dataset: data_vars = {k: _set_dims(ds.variables[k]) for k in ds.data_vars} coords = dict(ds.coords) coords.update(common_coords) - return Dataset(data_vars, coords, ds.attrs) + return ds.__class__(data_vars, coords, ds.attrs) + # remove casts once https://github.com/python/mypy/issues/12800 is resolved if isinstance(arg, DataArray): - return _broadcast_array(arg) + return cast("T_DataArrayOrSet", _broadcast_array(arg)) elif isinstance(arg, Dataset): - return _broadcast_dataset(arg) + return cast("T_DataArrayOrSet", _broadcast_dataset(arg)) else: raise ValueError("all input must be Dataset or DataArray objects") diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 204a4669b3d..b5bcc255b70 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -11,6 +11,7 @@ Iterable, Literal, Mapping, + NoReturn, Sequence, cast, overload, @@ -66,6 +67,8 @@ from .variable import IndexVariable, Variable, as_compatible_data, as_variable if TYPE_CHECKING: + from typing import TypeVar, Union + try: from dask.delayed import Delayed except ImportError: @@ -80,7 +83,22 @@ iris_Cube = None from ..backends.api import T_NetcdfEngine, T_NetcdfTypes - from .types import ErrorOptions, ErrorOptionsWithWarn, T_DataArray, T_Xarray + from .types import ( + DatetimeUnitOptions, + ErrorOptions, + ErrorOptionsWithWarn, + InterpAllOptions, + InterpOptions, + PadModeOptions, + PadReflectOptions, + QueryEngineOptions, + QueryParserOptions, + ReindexMethodOptions, + T_DataArray, + T_Xarray, + ) + + T_XarrayOther = TypeVar("T_XarrayOther", bound=Union["DataArray", Dataset]) def _infer_coords_and_dims( @@ -262,9 +280,9 @@ class DataArray( - mapping {coord name: (dimension name, array-like)} - mapping {coord name: (tuple of dimension names, array-like)} - dims : hashable or sequence of hashable, optional - Name(s) of the data dimension(s). Must be either a hashable - (only for 1D data) or a sequence of hashables with length equal + dims : Hashable or sequence of Hashable, optional + Name(s) of the data dimension(s). Must be either a Hashable + (only for 1D data) or a sequence of Hashables with length equal to the number of dimensions. If this argument is omitted, dimension names are taken from ``coords`` (if possible) and otherwise default to ``['dim_0', ... 'dim_n']``. @@ -361,14 +379,16 @@ class DataArray( def __init__( self, data: Any = dtypes.NA, - coords: Sequence[tuple] | Mapping[Any, Any] | None = None, + coords: Sequence[Sequence[Any] | pd.Index | DataArray] + | Mapping[Any, Any] + | None = None, dims: Hashable | Sequence[Hashable] | None = None, name: Hashable = None, attrs: Mapping = None, # internal parameters indexes: dict[Hashable, Index] = None, fastpath: bool = False, - ): + ) -> None: if fastpath: variable = data assert dims is None @@ -419,12 +439,12 @@ def __init__( @classmethod def _construct_direct( - cls, + cls: type[T_DataArray], variable: Variable, coords: dict[Any, Variable], name: Hashable, indexes: dict[Hashable, Index], - ) -> DataArray: + ) -> T_DataArray: """Shortcut around __init__ for internal use when we want to skip costly validation """ @@ -454,8 +474,10 @@ def _replace( return type(self)(variable, coords, name=name, indexes=indexes, fastpath=True) def _replace_maybe_drop_dims( - self, variable: Variable, name: Hashable | None | Default = _default - ) -> DataArray: + self: T_DataArray, + variable: Variable, + name: Hashable | None | Default = _default, + ) -> T_DataArray: if variable.dims == self.dims and variable.shape == self.shape: coords = self._coords.copy() indexes = self._indexes @@ -477,12 +499,12 @@ def _replace_maybe_drop_dims( return self._replace(variable, coords, name, indexes=indexes) def _overwrite_indexes( - self, + self: T_DataArray, indexes: Mapping[Any, Index], coords: Mapping[Any, Variable] = None, drop_coords: list[Hashable] = None, rename_dims: Mapping[Any, Any] = None, - ) -> DataArray: + ) -> T_DataArray: """Maybe replace indexes and their corresponding coordinates.""" if not indexes: return self @@ -515,8 +537,8 @@ def _to_temp_dataset(self) -> Dataset: return self._to_dataset_whole(name=_THIS_ARRAY, shallow_copy=False) def _from_temp_dataset( - self, dataset: Dataset, name: Hashable | None | Default = _default - ) -> DataArray: + self: T_DataArray, dataset: Dataset, name: Hashable | None | Default = _default + ) -> T_DataArray: variable = dataset._variables.pop(_THIS_ARRAY) coords = dataset._variables indexes = dataset._indexes @@ -577,11 +599,11 @@ def to_dataset( Parameters ---------- - dim : hashable, optional + dim : Hashable, optional Name of the dimension on this array along which to split this array into separate variables. If not provided, this array is converted into a Dataset of one variable. - name : hashable, optional + name : Hashable, optional Name to substitute for this array's name. Only valid if ``dim`` is not provided. promote_attrs : bool, default: False @@ -726,7 +748,7 @@ def dims(self) -> tuple[Hashable, ...]: return self.variable.dims @dims.setter - def dims(self, value): + def dims(self, value: Any) -> NoReturn: raise AttributeError( "you cannot assign dims on a DataArray. Use " ".rename() or .swap_dims() instead." @@ -738,7 +760,7 @@ def _item_key_to_dict(self, key: Any) -> Mapping[Hashable, Any]: key = indexing.expanded_indexer(key, self.ndim) return dict(zip(self.dims, key)) - def _getitem_coord(self, key): + def _getitem_coord(self: T_DataArray, key: Any) -> T_DataArray: from .dataset import _get_virtual_variable try: @@ -749,7 +771,7 @@ def _getitem_coord(self, key): return self._replace_maybe_drop_dims(var, name=key) - def __getitem__(self, key: Any) -> DataArray: + def __getitem__(self: T_DataArray, key: Any) -> T_DataArray: if isinstance(key, str): return self._getitem_coord(key) else: @@ -844,19 +866,36 @@ def coords(self) -> DataArrayCoordinates: """Dictionary-like container of coordinate arrays.""" return DataArrayCoordinates(self) + @overload def reset_coords( - self, - names: Iterable[Hashable] | Hashable | None = None, + self: T_DataArray, + names: Hashable | Iterable[Hashable] | None = None, + drop: Literal[False] = False, + ) -> Dataset: + ... + + @overload + def reset_coords( + self: T_DataArray, + names: Hashable | Iterable[Hashable] | None = None, + *, + drop: Literal[True], + ) -> T_DataArray: + ... + + def reset_coords( + self: T_DataArray, + names: Hashable | Iterable[Hashable] | None = None, drop: bool = False, - ) -> None | DataArray | Dataset: + ) -> T_DataArray | Dataset: """Given names of coordinates, reset them to become variables. Parameters ---------- - names : hashable or iterable of hashable, optional + names : Hashable or iterable of Hashable, optional Name(s) of non-index coordinates in this dataset to reset into variables. By default, all non-index coordinates are reset. - drop : bool, optional + drop : bool, default: False If True, remove coordinates instead of converting them into variables. @@ -907,14 +946,14 @@ def __dask_postpersist__(self): return self._dask_finalize, (self.name, func) + args @staticmethod - def _dask_finalize(results, name, func, *args, **kwargs): + def _dask_finalize(results, name, func, *args, **kwargs) -> DataArray: ds = func(results, *args, **kwargs) variable = ds._variables.pop(_THIS_ARRAY) coords = ds._variables indexes = ds._indexes return DataArray(variable, coords, name=name, indexes=indexes, fastpath=True) - def load(self, **kwargs) -> DataArray: + def load(self: T_DataArray, **kwargs) -> T_DataArray: """Manually trigger loading of this array's data from disk or a remote source into memory and return this array. @@ -938,7 +977,7 @@ def load(self, **kwargs) -> DataArray: self._coords = new._coords return self - def compute(self, **kwargs) -> DataArray: + def compute(self: T_DataArray, **kwargs) -> T_DataArray: """Manually trigger loading of this array's data from disk or a remote source into memory and return a new array. The original is left unaltered. @@ -960,7 +999,7 @@ def compute(self, **kwargs) -> DataArray: new = self.copy(deep=False) return new.load(**kwargs) - def persist(self, **kwargs) -> DataArray: + def persist(self: T_DataArray, **kwargs) -> T_DataArray: """Trigger computation in constituent dask arrays This keeps them as dask arrays but encourages them to keep data in @@ -1001,7 +1040,7 @@ def copy(self: T_DataArray, deep: bool = True, data: Any = None) -> T_DataArray: Returns ------- - object : DataArray + copy : DataArray New object with dimensions, attributes, coordinates, name, encoding, and optionally data copied from original. @@ -1059,15 +1098,15 @@ def copy(self: T_DataArray, deep: bool = True, data: Any = None) -> T_DataArray: return self._replace(variable, coords, indexes=indexes) - def __copy__(self) -> DataArray: + def __copy__(self: T_DataArray) -> T_DataArray: return self.copy(deep=False) - def __deepcopy__(self, memo=None) -> DataArray: + def __deepcopy__(self: T_DataArray, memo=None) -> T_DataArray: # memo does nothing but is required for compatibility with # copy.deepcopy return self.copy(deep=True) - # mutable objects should not be hashable + # mutable objects should not be Hashable # https://github.com/python/mypy/issues/4266 __hash__ = None # type: ignore[assignment] @@ -1105,7 +1144,7 @@ def chunksizes(self) -> Mapping[Any, tuple[int, ...]]: return get_chunksizes(all_variables) def chunk( - self, + self: T_DataArray, chunks: ( int | Literal["auto"] @@ -1114,11 +1153,11 @@ def chunk( | Mapping[Any, None | int | tuple[int, ...]] ) = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667) name_prefix: str = "xarray-", - token: str = None, + token: str | None = None, lock: bool = False, inline_array: bool = False, **chunks_kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Coerce this array's data into a dask arrays with the given chunks. If this variable is a non-dask array, it will be converted to dask @@ -1131,7 +1170,7 @@ def chunk( Parameters ---------- - chunks : int, "auto", tuple of int or mapping of hashable to int, optional + chunks : int, "auto", tuple of int or mapping of Hashable to int, optional Chunk sizes along each dimension, e.g., ``5``, ``"auto"``, ``(5, 5)`` or ``{"x": 5, "y": 5}``. name_prefix : str, optional @@ -1185,13 +1224,13 @@ def chunk( return self._from_temp_dataset(ds) def isel( - self, - indexers: Mapping[Any, Any] = None, + self: T_DataArray, + indexers: Mapping[Any, Any] | None = None, drop: bool = False, missing_dims: ErrorOptionsWithWarn = "raise", **indexers_kwargs: Any, - ) -> DataArray: - """Return a new DataArray whose data is given by integer indexing + ) -> T_DataArray: + """Return a new DataArray whose data is given by selecting indexes along the specified dimension(s). Parameters @@ -1215,6 +1254,10 @@ def isel( **indexers_kwargs : {dim: indexer, ...}, optional The keyword arguments form of ``indexers``. + Returns + ------- + indexed : xarray.DataArray + See Also -------- Dataset.isel @@ -1272,13 +1315,13 @@ def isel( return self._replace(variable=variable, coords=coords, indexes=indexes) def sel( - self, + self: T_DataArray, indexers: Mapping[Any, Any] = None, method: str = None, tolerance=None, drop: bool = False, **indexers_kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Return a new DataArray whose data is given by selecting index labels along the specified dimension(s). @@ -1320,10 +1363,11 @@ def sel( method : {None, "nearest", "pad", "ffill", "backfill", "bfill"}, optional Method to use for inexact matches: - * None (default): only exact matches - * pad / ffill: propagate last valid index value forward - * backfill / bfill: propagate next valid index value backward - * nearest: use nearest valid index value + - None (default): only exact matches + - pad / ffill: propagate last valid index value forward + - backfill / bfill: propagate next valid index value backward + - nearest: use nearest valid index value + tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must @@ -1390,10 +1434,10 @@ def sel( return self._from_temp_dataset(ds) def head( - self, + self: T_DataArray, indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Return a new DataArray whose data is given by the the first `n` values along the specified dimension(s). Default `n` = 5 @@ -1407,10 +1451,10 @@ def head( return self._from_temp_dataset(ds) def tail( - self, + self: T_DataArray, indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Return a new DataArray whose data is given by the the last `n` values along the specified dimension(s). Default `n` = 5 @@ -1424,10 +1468,10 @@ def tail( return self._from_temp_dataset(ds) def thin( - self, + self: T_DataArray, indexers: Mapping[Any, int] | int | None = None, **indexers_kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Return a new DataArray whose data is given by each `n` value along the specified dimension(s). @@ -1441,8 +1485,10 @@ def thin( return self._from_temp_dataset(ds) def broadcast_like( - self, other: DataArray | Dataset, exclude: Iterable[Hashable] | None = None - ) -> DataArray: + self: T_DataArray, + other: DataArray | Dataset, + exclude: Iterable[Hashable] | None = None, + ) -> T_DataArray: """Broadcast this DataArray against another Dataset or DataArray. This is equivalent to xr.broadcast(other, self)[1] @@ -1460,7 +1506,7 @@ def broadcast_like( ---------- other : Dataset or DataArray Object against which to broadcast this array. - exclude : iterable of hashable, optional + exclude : iterable of Hashable, optional Dimensions that must not be broadcasted Returns @@ -1512,10 +1558,12 @@ def broadcast_like( dims_map, common_coords = _get_broadcast_dims_map_common_coords(args, exclude) - return _broadcast_helper(args[1], exclude, dims_map, common_coords) + return _broadcast_helper( + cast("T_DataArray", args[1]), exclude, dims_map, common_coords + ) def _reindex_callback( - self, + self: T_DataArray, aligner: alignment.Aligner, dim_pos_indexers: dict[Hashable, Any], variables: dict[Hashable, Variable], @@ -1523,7 +1571,7 @@ def _reindex_callback( fill_value: Any, exclude_dims: frozenset[Hashable], exclude_vars: frozenset[Hashable], - ) -> DataArray: + ) -> T_DataArray: """Callback called from ``Aligner`` to create a new reindexed DataArray.""" if isinstance(fill_value, dict): @@ -1546,13 +1594,13 @@ def _reindex_callback( return self._from_temp_dataset(reindexed) def reindex_like( - self, + self: T_DataArray, other: DataArray | Dataset, - method: str | None = None, + method: ReindexMethodOptions = None, tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value=dtypes.NA, - ) -> DataArray: + ) -> T_DataArray: """Conform this object onto the indexes of another object, filling in missing values with ``fill_value``. The default fill value is NaN. @@ -1569,10 +1617,11 @@ def reindex_like( Method to use for filling index values from other not found on this data array: - * None (default): don't fill gaps - * pad / ffill: propagate last valid index value forward - * backfill / bfill: propagate next valid index value backward - * nearest: use nearest valid index value + - None (default): don't fill gaps + - pad / ffill: propagate last valid index value forward + - backfill / bfill: propagate next valid index value backward + - nearest: use nearest valid index value + tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must @@ -1581,7 +1630,7 @@ def reindex_like( to all values, or list-like, which applies variable tolerance per element. List-like must be the same size as the index and its dtype must exactly match the index’s type. - copy : bool, optional + copy : bool, default: True If ``copy=True``, data in the return value is always copied. If ``copy=False`` and reindexing is unnecessary, or can be performed with only slice operations, then the output may share memory with @@ -1612,14 +1661,14 @@ def reindex_like( ) def reindex( - self, + self: T_DataArray, indexers: Mapping[Any, Any] = None, - method: str = None, + method: ReindexMethodOptions = None, tolerance: int | float | Iterable[int | float] | None = None, copy: bool = True, fill_value=dtypes.NA, **indexers_kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Conform this object onto the indexes of another object, filling in missing values with ``fill_value``. The default fill value is NaN. @@ -1640,10 +1689,11 @@ def reindex( Method to use for filling index values in ``indexers`` not found on this data array: - * None (default): don't fill gaps - * pad / ffill: propagate last valid index value forward - * backfill / bfill: propagate next valid index value backward - * nearest: use nearest valid index value + - None (default): don't fill gaps + - pad / ffill: propagate last valid index value forward + - backfill / bfill: propagate next valid index value backward + - nearest: use nearest valid index value + tolerance : optional Maximum distance between original and new labels for inexact matches. The values of the index at the matching locations must @@ -1702,13 +1752,13 @@ def reindex( ) def interp( - self, - coords: Mapping[Any, Any] = None, - method: str = "linear", + self: T_DataArray, + coords: Mapping[Any, Any] | None = None, + method: InterpOptions = "linear", assume_sorted: bool = False, - kwargs: Mapping[str, Any] = None, + kwargs: Mapping[str, Any] | None = None, **coords_kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Multidimensional interpolation of variables. Parameters @@ -1718,16 +1768,17 @@ def interp( New coordinate can be an scalar, array-like or DataArray. If DataArrays are passed as new coordinates, their dimensions are used for the broadcasting. Missing values are skipped. - method : str, default: "linear" + method : {"linear", "nearest", "zero", "slinear", "quadratic", "cubic"}, default: "linear" The method used to interpolate. Choose from - {"linear", "nearest"} for multidimensional array, - {"linear", "nearest", "zero", "slinear", "quadratic", "cubic"} for 1-dimensional array. - assume_sorted : bool, optional + + assume_sorted : bool, default: False If False, values of x can be in any order and they are sorted first. If True, x has to be an array of monotonically increasing values. - kwargs : dict + kwargs : dict-like or None, default: None Additional keyword arguments passed to scipy's interpolator. Valid options and their behavior depend on if 1-dimensional or multi-dimensional interpolation is used. @@ -1832,12 +1883,12 @@ def interp( return self._from_temp_dataset(ds) def interp_like( - self, + self: T_DataArray, other: DataArray | Dataset, - method: str = "linear", + method: InterpOptions = "linear", assume_sorted: bool = False, - kwargs: Mapping[str, Any] = None, - ) -> DataArray: + kwargs: Mapping[str, Any] | None = None, + ) -> T_DataArray: """Interpolate this object onto the coordinates of another object, filling out of range values with NaN. @@ -1847,12 +1898,13 @@ def interp_like( Object with an 'indexes' attribute giving a mapping from dimension names to an 1d array-like, which provides coordinates upon which to index the variables in this dataset. Missing values are skipped. - method : str, default: "linear" + method : {"linear", "nearest", "zero", "slinear", "quadratic", "cubic"}, default: "linear" The method used to interpolate. Choose from - {"linear", "nearest"} for multidimensional array, - {"linear", "nearest", "zero", "slinear", "quadratic", "cubic"} for 1-dimensional array. - assume_sorted : bool, optional + + assume_sorted : bool, default: False If False, values of coordinates that are interpolated over can be in any order and they are sorted first. If True, interpolated coordinates are assumed to be an array of monotonically increasing @@ -1887,9 +1939,11 @@ def interp_like( ) return self._from_temp_dataset(ds) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def rename( self, - new_name_or_name_dict: Hashable | Mapping[Any, Hashable] = None, + new_name_or_name_dict: Hashable | Mapping[Any, Hashable] | None = None, **names: Hashable, ) -> DataArray: """Returns a new DataArray with renamed coordinates or a new name. @@ -1900,7 +1954,7 @@ def rename( If the argument is dict-like, it used as a mapping from old names to new names for coordinates. Otherwise, use the argument as the new name for this array. - **names : hashable, optional + **names : Hashable, optional The keyword arguments form of a mapping from old names to new names for coordinates. One of new_name_or_name_dict or names must be provided. @@ -1927,8 +1981,10 @@ def rename( return self._replace(name=new_name_or_name_dict) def swap_dims( - self, dims_dict: Mapping[Any, Hashable] = None, **dims_kwargs - ) -> DataArray: + self: T_DataArray, + dims_dict: Mapping[Any, Hashable] | None = None, + **dims_kwargs, + ) -> T_DataArray: """Returns a new DataArray with swapped dimensions. Parameters @@ -1983,10 +2039,12 @@ def swap_dims( ds = self._to_temp_dataset().swap_dims(dims_dict) return self._from_temp_dataset(ds) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def expand_dims( self, dim: None | Hashable | Sequence[Hashable] | Mapping[Any, Any] = None, - axis=None, + axis: None | int | Sequence[int] = None, **dim_kwargs: Any, ) -> DataArray: """Return a new object with an additional axis (or axes) inserted at @@ -1998,16 +2056,16 @@ def expand_dims( Parameters ---------- - dim : hashable, sequence of hashable, dict, or None, optional + dim : Hashable, sequence of Hashable, dict, or None, optional Dimensions to include on the new variable. If provided as str or sequence of str, then dimensions are inserted with length 1. If provided as a dict, then the keys are the new dimensions and the values are either integers (giving the length of the new dimensions) or sequence/ndarray (giving the coordinates of the new dimensions). - axis : int, list of int or tuple of int, or None, default: None + axis : int, sequence of int, or None, default: None Axis position(s) where new axis is to be inserted (position(s) on - the result array). If a list (or tuple) of integers is passed, + the result array). If a sequence of integers is passed, multiple axes are inserted. In this case, dim arguments should be same length list. If axis=None is passed, all the axes will be inserted to the start of the result array. @@ -2019,11 +2077,11 @@ def expand_dims( Returns ------- - expanded : same type as caller - This object, but with an additional dimension(s). + expanded : DataArray + This object, but with additional dimension(s). """ if isinstance(dim, int): - raise TypeError("dim should be hashable or sequence/mapping of hashables") + raise TypeError("dim should be Hashable or sequence/mapping of Hashables") elif isinstance(dim, Sequence) and not isinstance(dim, str): if len(dim) != len(set(dim)): raise ValueError("dims should not contain duplicate values.") @@ -2035,6 +2093,8 @@ def expand_dims( ds = self._to_temp_dataset().expand_dims(dim, axis) return self._from_temp_dataset(ds) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def set_index( self, indexes: Mapping[Any, Hashable | Sequence[Hashable]] = None, @@ -2050,9 +2110,9 @@ def set_index( Mapping from names matching dimensions and values given by (lists of) the names of existing coordinates or variables to set as new (multi-)index. - append : bool, optional + append : bool, default: False If True, append the supplied index(es) to the existing index(es). - Otherwise replace the existing index(es) (default). + Otherwise replace the existing index(es). **indexes_kwargs : optional The keyword arguments form of ``indexes``. One of indexes or indexes_kwargs must be provided. @@ -2092,6 +2152,8 @@ def set_index( ds = self._to_temp_dataset().set_index(indexes, append=append, **indexes_kwargs) return self._from_temp_dataset(ds) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def reset_index( self, dims_or_levels: Hashable | Sequence[Hashable], @@ -2101,10 +2163,10 @@ def reset_index( Parameters ---------- - dims_or_levels : hashable or sequence of hashable + dims_or_levels : Hashable or sequence of Hashable Name(s) of the dimension(s) and/or multi-index level(s) that will be reset. - drop : bool, optional + drop : bool, default: False If True, remove the specified indexes and/or multi-index levels instead of extracting them as new coordinates (default: False). @@ -2122,15 +2184,15 @@ def reset_index( return self._from_temp_dataset(ds) def reorder_levels( - self, - dim_order: Mapping[Any, Sequence[int]] = None, - **dim_order_kwargs: Sequence[int], - ) -> DataArray: + self: T_DataArray, + dim_order: Mapping[Any, Sequence[int | Hashable]] | None = None, + **dim_order_kwargs: Sequence[int | Hashable], + ) -> T_DataArray: """Rearrange index levels using input order. Parameters ---------- - dim_order : optional + dim_order dict-like of Hashable to int or Hashable: optional Mapping from names matching dimensions and values given by lists representing new level orders. Every given dimension must have a multi-index. @@ -2148,12 +2210,12 @@ def reorder_levels( return self._from_temp_dataset(ds) def stack( - self, - dimensions: Mapping[Any, Sequence[Hashable]] = None, - create_index: bool = True, + self: T_DataArray, + dimensions: Mapping[Any, Sequence[Hashable]] | None = None, + create_index: bool | None = True, index_cls: type[Index] = PandasMultiIndex, **dimensions_kwargs: Sequence[Hashable], - ) -> DataArray: + ) -> T_DataArray: """ Stack any number of existing dimensions into a single new dimension. @@ -2162,14 +2224,14 @@ def stack( Parameters ---------- - dimensions : mapping of hashable to sequence of hashable + dimensions : mapping of Hashable to sequence of Hashable Mapping of the form `new_name=(dim1, dim2, ...)`. Names of new dimensions, and the existing dimensions that they replace. An ellipsis (`...`) will be replaced by all unlisted dimensions. Passing a list containing an ellipsis (`stacked_dim=[...]`) will stack over all dimensions. - create_index : bool, optional - If True (default), create a multi-index for each of the stacked dimensions. + create_index : bool or None, default: True + If True, create a multi-index for each of the stacked dimensions. If False, don't create any index. If None, create a multi-index only if exactly one single (1-d) coordinate index is found for every dimension to stack. @@ -2220,6 +2282,8 @@ def stack( ) return self._from_temp_dataset(ds) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def unstack( self, dim: Hashable | Sequence[Hashable] | None = None, @@ -2234,16 +2298,16 @@ def unstack( Parameters ---------- - dim : hashable or sequence of hashable, optional + dim : Hashable or sequence of Hashable, optional Dimension(s) over which to unstack. By default unstacks all MultiIndexes. fill_value : scalar or dict-like, default: nan - value to be filled. If a dict-like, maps variable names to + Value to be filled. If a dict-like, maps variable names to fill values. Use the data array's name to refer to its name. If not provided or if the dict-like does not contain all variables, the dtype's NA value will be used. sparse : bool, default: False - use sparse-array if True + Use sparse-array if True Returns ------- @@ -2283,7 +2347,7 @@ def unstack( ds = self._to_temp_dataset().unstack(dim, fill_value, sparse) return self._from_temp_dataset(ds) - def to_unstacked_dataset(self, dim, level=0): + def to_unstacked_dataset(self, dim: Hashable, level: int | Hashable = 0) -> Dataset: """Unstack DataArray expanding to Dataset along a given level of a stacked coordinate. @@ -2291,9 +2355,9 @@ def to_unstacked_dataset(self, dim, level=0): Parameters ---------- - dim : str + dim : Hashable Name of existing dimension to unstack - level : int or str + level : int or Hashable, default: 0 The MultiIndex level to expand to a dataset along. Can either be the integer index of the level or its name. @@ -2349,16 +2413,16 @@ def to_unstacked_dataset(self, dim, level=0): return Dataset(data_dict) def transpose( - self, + self: T_DataArray, *dims: Hashable, transpose_coords: bool = True, missing_dims: ErrorOptionsWithWarn = "raise", - ) -> DataArray: + ) -> T_DataArray: """Return a new DataArray object with transposed dimensions. Parameters ---------- - *dims : hashable, optional + *dims : Hashable, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. transpose_coords : bool, default: True @@ -2399,17 +2463,22 @@ def transpose( return self._replace(variable) @property - def T(self) -> DataArray: + def T(self: T_DataArray) -> T_DataArray: return self.transpose() + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def drop_vars( - self, names: Hashable | Iterable[Hashable], *, errors: ErrorOptions = "raise" + self, + names: Hashable | Iterable[Hashable], + *, + errors: ErrorOptions = "raise", ) -> DataArray: """Returns an array with dropped variables. Parameters ---------- - names : hashable or iterable of hashable + names : Hashable or iterable of Hashable Name(s) of variables to drop. errors : {"raise", "ignore"}, default: "raise" If 'raise', raises a ValueError error if any of the variable @@ -2425,13 +2494,13 @@ def drop_vars( return self._from_temp_dataset(ds) def drop( - self, - labels: Mapping = None, - dim: Hashable = None, + self: T_DataArray, + labels: Mapping[Any, Any] | None = None, + dim: Hashable | None = None, *, errors: ErrorOptions = "raise", **labels_kwargs, - ) -> DataArray: + ) -> T_DataArray: """Backward compatible method based on `drop_vars` and `drop_sel` Using either `drop_vars` or `drop_sel` is encouraged @@ -2441,21 +2510,21 @@ def drop( DataArray.drop_vars DataArray.drop_sel """ - ds = self._to_temp_dataset().drop(labels, dim, errors=errors) + ds = self._to_temp_dataset().drop(labels, dim, errors=errors, **labels_kwargs) return self._from_temp_dataset(ds) def drop_sel( - self, - labels: Mapping[Any, Any] = None, + self: T_DataArray, + labels: Mapping[Any, Any] | None = None, *, errors: ErrorOptions = "raise", **labels_kwargs, - ) -> DataArray: + ) -> T_DataArray: """Drop index labels from this DataArray. Parameters ---------- - labels : mapping of hashable to Any + labels : mapping of Hashable to Any Index labels to drop errors : {"raise", "ignore"}, default: "raise" If 'raise', raises a ValueError error if @@ -2475,12 +2544,14 @@ def drop_sel( ds = self._to_temp_dataset().drop_sel(labels, errors=errors) return self._from_temp_dataset(ds) - def drop_isel(self, indexers=None, **indexers_kwargs): + def drop_isel( + self: T_DataArray, indexers: Mapping[Any, Any] | None = None, **indexers_kwargs + ) -> T_DataArray: """Drop index positions from this DataArray. Parameters ---------- - indexers : mapping of hashable to Any + indexers : mapping of Hashable to Any or None, default: None Index locations to drop **indexers_kwargs : {dim: position, ...}, optional The keyword arguments form of ``dim`` and ``positions`` @@ -2497,29 +2568,35 @@ def drop_isel(self, indexers=None, **indexers_kwargs): dataset = dataset.drop_isel(indexers=indexers, **indexers_kwargs) return self._from_temp_dataset(dataset) - def dropna(self, dim: Hashable, how: str = "any", thresh: int = None) -> DataArray: + def dropna( + self: T_DataArray, + dim: Hashable, + how: Literal["any", "all"] = "any", + thresh: int | None = None, + ) -> T_DataArray: """Returns a new array with dropped labels for missing values along the provided dimension. Parameters ---------- - dim : hashable + dim : Hashable Dimension along which to drop missing values. Dropping along multiple dimensions simultaneously is not yet supported. - how : {"any", "all"}, optional - * any : if any NA values are present, drop that label - * all : if all values are NA, drop that label - thresh : int, default: None + how : {"any", "all"}, default: "any" + - any : if any NA values are present, drop that label + - all : if all values are NA, drop that label + + thresh : int or None, default: None If supplied, require this many non-NA values. Returns ------- - DataArray + dropped : DataArray """ ds = self._to_temp_dataset().dropna(dim, how=how, thresh=thresh) return self._from_temp_dataset(ds) - def fillna(self, value: Any) -> DataArray: + def fillna(self: T_DataArray, value: Any) -> T_DataArray: """Fill missing values in this object. This operation follows the normal broadcasting and alignment rules that @@ -2536,7 +2613,7 @@ def fillna(self, value: Any) -> DataArray: Returns ------- - DataArray + filled : DataArray """ if utils.is_dict_like(value): raise TypeError( @@ -2547,27 +2624,34 @@ def fillna(self, value: Any) -> DataArray: return out def interpolate_na( - self, - dim: Hashable = None, - method: str = "linear", - limit: int = None, + self: T_DataArray, + dim: Hashable | None = None, + method: InterpAllOptions = "linear", + limit: int | None = None, use_coordinate: bool | str = True, max_gap: ( - int | float | str | pd.Timedelta | np.timedelta64 | datetime.timedelta + None + | int + | float + | str + | pd.Timedelta + | np.timedelta64 + | datetime.timedelta ) = None, - keep_attrs: bool = None, + keep_attrs: bool | None = None, **kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Fill in NaNs by interpolating according to different methods. Parameters ---------- dim : str Specifies the dimension along which to interpolate. - method : str, optional + method : {"linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial", \ + "barycentric", "krog", "pchip", "spline", "akima"}, default: "linear" String indicating which method to use for interpolation: - - 'linear': linear interpolation (Default). Additional keyword + - 'linear': linear interpolation. Additional keyword arguments are passed to :py:func:`numpy.interp` - 'nearest', 'zero', 'slinear', 'quadratic', 'cubic', 'polynomial': are passed to :py:func:`scipy.interpolate.interp1d`. If @@ -2575,13 +2659,14 @@ def interpolate_na( provided. - 'barycentric', 'krog', 'pchip', 'spline', 'akima': use their respective :py:class:`scipy.interpolate` classes. + use_coordinate : bool or str, default: True Specifies which index to use as the x values in the interpolation formulated as `y = f(x)`. If False, values are treated as if eqaully-spaced along ``dim``. If True, the IndexVariable `dim` is used. If ``use_coordinate`` is a string, it specifies the name of a coordinate variariable to use as the index. - limit : int, default: None + limit : int or None, default: None Maximum number of consecutive NaNs to fill. Must be greater than 0 or None for no limit. This filling is done regardless of the size of the gap in the data. To only interpolate over gaps less than a given length, @@ -2609,7 +2694,7 @@ def interpolate_na( * x (x) int64 0 1 2 3 4 5 6 7 8 The gap lengths are 3-0 = 3; 6-3 = 3; and 8-6 = 2 respectively - keep_attrs : bool, default: True + keep_attrs : bool or None, default: None If True, the dataarray's attributes (`attrs`) will be copied from the original object to the new one. If False, the new object will be returned without attributes. @@ -2662,17 +2747,19 @@ def interpolate_na( **kwargs, ) - def ffill(self, dim: Hashable, limit: int = None) -> DataArray: + def ffill( + self: T_DataArray, dim: Hashable, limit: int | None = None + ) -> T_DataArray: """Fill NaN values by propagating values forward *Requires bottleneck.* Parameters ---------- - dim : hashable + dim : Hashable Specifies the dimension along which to propagate values when filling. - limit : int, default: None + limit : int or None, default: None The maximum number of consecutive NaN values to forward fill. In other words, if there is a gap with more than this number of consecutive NaNs, it will only be partially filled. Must be greater @@ -2681,13 +2768,15 @@ def ffill(self, dim: Hashable, limit: int = None) -> DataArray: Returns ------- - DataArray + filled : DataArray """ from .missing import ffill return ffill(self, dim, limit=limit) - def bfill(self, dim: Hashable, limit: int = None) -> DataArray: + def bfill( + self: T_DataArray, dim: Hashable, limit: int | None = None + ) -> T_DataArray: """Fill NaN values by propagating values backward *Requires bottleneck.* @@ -2697,7 +2786,7 @@ def bfill(self, dim: Hashable, limit: int = None) -> DataArray: dim : str Specifies the dimension along which to propagate values when filling. - limit : int, default: None + limit : int or None, default: None The maximum number of consecutive NaN values to backward fill. In other words, if there is a gap with more than this number of consecutive NaNs, it will only be partially filled. Must be greater @@ -2706,13 +2795,13 @@ def bfill(self, dim: Hashable, limit: int = None) -> DataArray: Returns ------- - DataArray + filled : DataArray """ from .missing import bfill return bfill(self, dim, limit=limit) - def combine_first(self, other: DataArray) -> DataArray: + def combine_first(self: T_DataArray, other: T_DataArray) -> T_DataArray: """Combine two DataArray objects, with union of coordinates. This operation follows the normal broadcasting and alignment rules of @@ -2731,15 +2820,15 @@ def combine_first(self, other: DataArray) -> DataArray: return ops.fillna(self, other, join="outer") def reduce( - self, + self: T_DataArray, func: Callable[..., Any], dim: None | Hashable | Sequence[Hashable] = None, *, axis: None | int | Sequence[int] = None, - keep_attrs: bool = None, + keep_attrs: bool | None = None, keepdims: bool = False, **kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Reduce this array by applying `func` along some dimension(s). Parameters @@ -2748,14 +2837,14 @@ def reduce( Function which can be called in the form `f(x, axis=axis, **kwargs)` to return the result of reducing an np.ndarray over an integer valued axis. - dim : hashable or sequence of hashable, optional + dim : Hashable or sequence of Hashable, optional Dimension(s) over which to apply `func`. axis : int or sequence of int, optional Axis(es) over which to repeatedly apply `func`. Only one of the 'dim' and 'axis' arguments can be supplied. If neither are supplied, then the reduction is calculated over the flattened array (by calling `f(x)` without an axis argument). - keep_attrs : bool, optional + keep_attrs : bool or None, optional If True, the variable's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. @@ -2789,6 +2878,11 @@ def to_pandas(self) -> DataArray | pd.Series | pd.DataFrame: Only works for arrays with 2 or fewer dimensions. The DataArray constructor performs the inverse transformation. + + Returns + ------- + result : DataArray | Series | DataFrame + DataArray, pandas Series or pandas DataFrame. """ # TODO: consolidate the info about pandas constructors and the # attributes that correspond to their indexes into a separate module? @@ -2797,14 +2891,14 @@ def to_pandas(self) -> DataArray | pd.Series | pd.DataFrame: constructor = constructors[self.ndim] except KeyError: raise ValueError( - f"cannot convert arrays with {self.ndim} dimensions into " - "pandas objects" + f"Cannot convert arrays with {self.ndim} dimensions into " + "pandas objects. Requires 2 or fewer dimensions." ) indexes = [self.get_index(dim) for dim in self.dims] return constructor(self.values, *indexes) def to_dataframe( - self, name: Hashable = None, dim_order: list[Hashable] = None + self, name: Hashable | None = None, dim_order: Sequence[Hashable] | None = None ) -> pd.DataFrame: """Convert this array and its coordinates into a tidy pandas.DataFrame. @@ -2817,9 +2911,9 @@ def to_dataframe( Parameters ---------- - name + name: Hashable or None, optional Name to give to this array (required if unnamed). - dim_order + dim_order: Sequence of Hashable or None, optional Hierarchical dimension order for the resulting dataframe. Array content is transposed to this order and then written out as flat vectors in contiguous order, so the last dimension in this list @@ -2832,12 +2926,13 @@ def to_dataframe( Returns ------- - result + result: DataFrame DataArray as a pandas DataFrame. See also -------- DataArray.to_pandas + DataArray.to_series """ if name is None: name = self.name @@ -2871,6 +2966,16 @@ def to_series(self) -> pd.Series: The Series is indexed by the Cartesian product of index coordinates (in the form of a :py:class:`pandas.MultiIndex`). + + Returns + ------- + result : Series + DataArray as a pandas Series. + + See also + -------- + DataArray.to_pandas + DataArray.to_dataframe """ index = self.coords.to_index() return pd.Series(self.values.reshape(-1), index=index, name=self.name) @@ -2958,7 +3063,7 @@ def to_netcdf( Parameters ---------- - path : str, path-like or file-like, optional + path : str, path-like or None, optional Path to which to save this dataset. File-like objects are only supported by the scipy engine. If no path is provided, this function returns the resulting netCDF file as bytes; in this case, @@ -3008,7 +3113,7 @@ def to_netcdf( This allows using any compression plugin installed in the HDF5 library, e.g. LZF. - unlimited_dims : iterable of hashable, optional + unlimited_dims : iterable of Hashable, optional Dimension(s) that should be serialized as unlimited dimensions. By default, no dimensions are treated as unlimited dimensions. Note that unlimited_dims may also be set via @@ -3023,6 +3128,7 @@ def to_netcdf( Returns ------- + store: bytes or Delayed or None * ``bytes`` if path is None * ``dask.delayed.Delayed`` if compute is False * None otherwise @@ -3067,7 +3173,7 @@ def to_netcdf( invalid_netcdf=invalid_netcdf, ) - def to_dict(self, data: bool = True, encoding: bool = False) -> dict: + def to_dict(self, data: bool = True, encoding: bool = False) -> dict[str, Any]: """ Convert this xarray.DataArray into a dictionary following xarray naming conventions. @@ -3078,12 +3184,16 @@ def to_dict(self, data: bool = True, encoding: bool = False) -> dict: Parameters ---------- - data : bool, optional + data : bool, default: True Whether to include the actual data in the dictionary. When set to False, returns just the schema. - encoding : bool, optional + encoding : bool, default: False Whether to include the Dataset's encoding in the dictionary. + Returns + ------- + dict: dict + See Also -------- DataArray.from_dict @@ -3098,7 +3208,7 @@ def to_dict(self, data: bool = True, encoding: bool = False) -> dict: return d @classmethod - def from_dict(cls, d: dict) -> DataArray: + def from_dict(cls: type[T_DataArray], d: Mapping[str, Any]) -> T_DataArray: """Convert a dictionary into an xarray.DataArray Parameters @@ -3174,12 +3284,18 @@ def from_series(cls, series: pd.Series, sparse: bool = False) -> DataArray: values with NaN). Thus this operation should be the inverse of the `to_series` method. - If sparse=True, creates a sparse array instead of a dense NumPy array. - Requires the pydata/sparse package. + Parameters + ---------- + series : Series + Pandas Series object to convert. + sparse : bool, default: False + If sparse=True, creates a sparse array instead of a dense NumPy array. + Requires the pydata/sparse package. See Also -------- - xarray.Dataset.from_dataframe + DataArray.to_series + Dataset.from_dataframe """ temp_name = "__temporary_name" df = pd.DataFrame({temp_name: series}) @@ -3214,7 +3330,7 @@ def from_iris(cls, cube: iris_Cube) -> DataArray: return from_iris(cube) - def _all_compat(self, other: DataArray, compat_str: str) -> bool: + def _all_compat(self: T_DataArray, other: T_DataArray, compat_str: str) -> bool: """Helper function for equals, broadcast_equals, and identical""" def compat(x, y): @@ -3224,11 +3340,21 @@ def compat(x, y): self, other ) - def broadcast_equals(self, other: DataArray) -> bool: + def broadcast_equals(self: T_DataArray, other: T_DataArray) -> bool: """Two DataArrays are broadcast equal if they are equal after broadcasting them against each other such that they have the same dimensions. + Parameters + ---------- + other : DataArray + DataArray to compare to. + + Returns + ---------- + equal : bool + True if the two DataArrays are broadcast equal. + See Also -------- DataArray.equals @@ -3239,7 +3365,7 @@ def broadcast_equals(self, other: DataArray) -> bool: except (TypeError, AttributeError): return False - def equals(self, other: DataArray) -> bool: + def equals(self: T_DataArray, other: T_DataArray) -> bool: """True if two DataArrays have the same dimensions, coordinates and values; otherwise False. @@ -3249,6 +3375,16 @@ def equals(self, other: DataArray) -> bool: This method is necessary because `v1 == v2` for ``DataArray`` does element-wise comparisons (like numpy.ndarrays). + Parameters + ---------- + other : DataArray + DataArray to compare to. + + Returns + ---------- + equal : bool + True if the two DataArrays are equal. + See Also -------- DataArray.broadcast_equals @@ -3259,10 +3395,20 @@ def equals(self, other: DataArray) -> bool: except (TypeError, AttributeError): return False - def identical(self, other: DataArray) -> bool: + def identical(self: T_DataArray, other: T_DataArray) -> bool: """Like equals, but also checks the array name and attributes, and attributes on all coordinates. + Parameters + ---------- + other : DataArray + DataArray to compare to. + + Returns + ---------- + equal : bool + True if the two DataArrays are identical. + See Also -------- DataArray.broadcast_equals @@ -3282,19 +3428,19 @@ def _result_name(self, other: Any = None) -> Hashable | None: else: return None - def __array_wrap__(self, obj, context=None) -> DataArray: + def __array_wrap__(self: T_DataArray, obj, context=None) -> T_DataArray: new_var = self.variable.__array_wrap__(obj, context) return self._replace(new_var) - def __matmul__(self, obj): + def __matmul__(self: T_DataArray, obj: T_DataArray) -> T_DataArray: return self.dot(obj) - def __rmatmul__(self, other): + def __rmatmul__(self: T_DataArray, other: T_DataArray) -> T_DataArray: # currently somewhat duplicative, as only other DataArrays are # compatible with matmul return computation.dot(other, self) - def _unary_op(self, f: Callable, *args, **kwargs): + def _unary_op(self: T_DataArray, f: Callable, *args, **kwargs) -> T_DataArray: keep_attrs = kwargs.pop("keep_attrs", None) if keep_attrs is None: keep_attrs = _get_keep_attrs(default=True) @@ -3310,16 +3456,16 @@ def _unary_op(self, f: Callable, *args, **kwargs): return da def _binary_op( - self, - other, + self: T_DataArray, + other: Any, f: Callable, reflexive: bool = False, - ): + ) -> T_DataArray: if isinstance(other, (Dataset, groupby.GroupBy)): return NotImplemented if isinstance(other, DataArray): align_type = OPTIONS["arithmetic_join"] - self, other = align(self, other, join=align_type, copy=False) + self, other = align(self, other, join=align_type, copy=False) # type: ignore other_variable = getattr(other, "variable", other) other_coords = getattr(other, "coords", None) @@ -3333,7 +3479,7 @@ def _binary_op( return self._replace(variable, coords, name, indexes=indexes) - def _inplace_binary_op(self, other, f: Callable): + def _inplace_binary_op(self: T_DataArray, other: Any, f: Callable) -> T_DataArray: if isinstance(other, groupby.GroupBy): raise TypeError( "in-place operations between a DataArray and " @@ -3394,16 +3540,18 @@ def _title_for_slice(self, truncate: int = 50) -> str: return title - def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> DataArray: + def diff( + self: T_DataArray, dim: Hashable, n: int = 1, label: Hashable = "upper" + ) -> T_DataArray: """Calculate the n-th order discrete difference along given axis. Parameters ---------- - dim : hashable + dim : Hashable Dimension over which to calculate the finite difference. - n : int, optional + n : int, default: 1 The number of times values are differenced. - label : hashable, optional + label : Hashable, default: "upper" The new coordinate in dimension ``dim`` will have the values of either the minuend's or subtrahend's coordinate for values 'upper' and 'lower', respectively. Other @@ -3411,7 +3559,7 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> DataArra Returns ------- - difference : same type as caller + difference : DataArray The n-th order finite difference of this object. Notes @@ -3441,11 +3589,11 @@ def diff(self, dim: Hashable, n: int = 1, label: Hashable = "upper") -> DataArra return self._from_temp_dataset(ds) def shift( - self, - shifts: Mapping[Any, int] = None, + self: T_DataArray, + shifts: Mapping[Any, int] | None = None, fill_value: Any = dtypes.NA, **shifts_kwargs: int, - ) -> DataArray: + ) -> T_DataArray: """Shift this DataArray by an offset along one or more dimensions. Only the data is moved; coordinates stay in place. This is consistent @@ -3457,7 +3605,7 @@ def shift( Parameters ---------- - shifts : mapping of hashable to int, optional + shifts : mapping of Hashable to int or None, optional Integer offset to shift along each of the given dimensions. Positive offsets shift to the right; negative offsets shift to the left. @@ -3491,11 +3639,11 @@ def shift( return self._replace(variable=variable) def roll( - self, - shifts: Mapping[Hashable, int] = None, + self: T_DataArray, + shifts: Mapping[Hashable, int] | None = None, roll_coords: bool = False, **shifts_kwargs: int, - ) -> DataArray: + ) -> T_DataArray: """Roll this array by an offset along one or more dimensions. Unlike shift, roll treats the given dimensions as periodic, so will not @@ -3507,7 +3655,7 @@ def roll( Parameters ---------- - shifts : mapping of hashable to int, optional + shifts : mapping of Hashable to int, optional Integer offset to rotate each of the given dimensions. Positive offsets roll to the right; negative offsets roll to the left. @@ -3540,15 +3688,17 @@ def roll( return self._from_temp_dataset(ds) @property - def real(self) -> DataArray: + def real(self: T_DataArray) -> T_DataArray: return self._replace(self.variable.real) @property - def imag(self) -> DataArray: + def imag(self: T_DataArray) -> T_DataArray: return self._replace(self.variable.imag) def dot( - self, other: T_DataArray, dims: Hashable | Sequence[Hashable] | None = None + self: T_DataArray, + other: T_DataArray, + dims: Hashable | Sequence[Hashable] | None = None, ) -> T_DataArray: """Perform dot product of two DataArrays along their shared dims. @@ -3558,7 +3708,7 @@ def dot( ---------- other : DataArray The other array with which the dot product is performed. - dims : ..., hashable or sequence of hashable, optional + dims : ..., Hashable or sequence of Hashable, optional Which dimensions to sum over. Ellipsis (`...`) sums over all dimensions. If not specified, then all the common dimensions are summed over. @@ -3599,6 +3749,8 @@ def dot( return computation.dot(self, other, dims=dims) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def sortby( self, variables: Hashable | DataArray | Sequence[Hashable | DataArray], @@ -3622,10 +3774,10 @@ def sortby( Parameters ---------- - variables : hashable, DataArray, or sequence of hashable or DataArray + variables : Hashable, DataArray, or sequence of Hashable or DataArray 1D DataArray objects or name(s) of 1D variable(s) in coords whose values are used to sort this array. - ascending : bool, optional + ascending : bool, default: True Whether to sort by ascending or descending order. Returns @@ -3664,14 +3816,14 @@ def sortby( return self._from_temp_dataset(ds) def quantile( - self, + self: T_DataArray, q: ArrayLike, dim: str | Sequence[Hashable] | None = None, method: QUANTILE_METHODS = "linear", - keep_attrs: bool = None, - skipna: bool = None, + keep_attrs: bool | None = None, + skipna: bool | None = None, interpolation: QUANTILE_METHODS = None, - ) -> DataArray: + ) -> T_DataArray: """Compute the qth quantile of the data along the specified dimension. Returns the qth quantiles(s) of the array elements. @@ -3680,7 +3832,7 @@ def quantile( ---------- q : float or array-like of float Quantile to compute, which must be between 0 and 1 inclusive. - dim : hashable or sequence of hashable, optional + dim : Hashable or sequence of Hashable, optional Dimension(s) over which to apply quantile. method : str, default: "linear" This optional parameter specifies the interpolation method to use when the @@ -3710,11 +3862,11 @@ def quantile( previously called "interpolation", renamed in accordance with numpy version 1.22.0. - keep_attrs : bool, optional + keep_attrs : bool or None, optional If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. - skipna : bool, optional + skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been @@ -3783,8 +3935,11 @@ def quantile( return self._from_temp_dataset(ds) def rank( - self, dim: Hashable, pct: bool = False, keep_attrs: bool = None - ) -> DataArray: + self: T_DataArray, + dim: Hashable, + pct: bool = False, + keep_attrs: bool | None = None, + ) -> T_DataArray: """Ranks the data. Equal values are assigned a rank that is the average of the ranks that @@ -3797,11 +3952,11 @@ def rank( Parameters ---------- - dim : hashable + dim : Hashable Dimension over which to compute rank. - pct : bool, optional + pct : bool, default: False If True, compute percentage ranks, otherwise compute integer ranks. - keep_attrs : bool, optional + keep_attrs : bool or None, optional If True, the dataset's attributes (`attrs`) will be copied from the original object to the new one. If False (default), the new object will be returned without attributes. @@ -3824,8 +3979,11 @@ def rank( return self._from_temp_dataset(ds) def differentiate( - self, coord: Hashable, edge_order: int = 1, datetime_unit: str = None - ) -> DataArray: + self: T_DataArray, + coord: Hashable, + edge_order: Literal[1, 2] = 1, + datetime_unit: DatetimeUnitOptions | None = None, + ) -> T_DataArray: """ Differentiate the array with the second order accurate central differences. @@ -3835,7 +3993,7 @@ def differentiate( Parameters ---------- - coord : hashable + coord : Hashable The coordinate to be used to compute the gradient. edge_order : {1, 2}, default: 1 N-th order accurate differences at the boundaries. @@ -3882,10 +4040,12 @@ def differentiate( ds = self._to_temp_dataset().differentiate(coord, edge_order, datetime_unit) return self._from_temp_dataset(ds) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def integrate( self, coord: Hashable | Sequence[Hashable] = None, - datetime_unit: str = None, + datetime_unit: DatetimeUnitOptions | None = None, ) -> DataArray: """Integrate along the given coordinate using the trapezoidal rule. @@ -3895,7 +4055,7 @@ def integrate( Parameters ---------- - coord : hashable, or sequence of hashable + coord : Hashable, or sequence of Hashable Coordinate(s) used for the integration. datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as'}, optional @@ -3936,10 +4096,12 @@ def integrate( ds = self._to_temp_dataset().integrate(coord, datetime_unit) return self._from_temp_dataset(ds) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def cumulative_integrate( self, coord: Hashable | Sequence[Hashable] = None, - datetime_unit: str = None, + datetime_unit: DatetimeUnitOptions | None = None, ) -> DataArray: """Integrate cumulatively along the given coordinate using the trapezoidal rule. @@ -3952,7 +4114,7 @@ def cumulative_integrate( Parameters ---------- - coord : hashable, or sequence of hashable + coord : Hashable, or sequence of Hashable Coordinate(s) used for the integration. datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as'}, optional @@ -4124,8 +4286,8 @@ def polyfit( rcond: float | None = None, w: Hashable | Any | None = None, full: bool = False, - cov: bool = False, - ): + cov: bool | Literal["unscaled"] = False, + ) -> Dataset: """ Least squares polynomial fit. @@ -4134,23 +4296,23 @@ def polyfit( Parameters ---------- - dim : hashable + dim : Hashable Coordinate along which to fit the polynomials. deg : int Degree of the fitting polynomial. - skipna : bool, optional + skipna : bool or None, optional If True, removes all invalid values before fitting each 1D slices of the array. Default is True if data is stored in a dask.array or if there is any invalid values, False otherwise. - rcond : float, optional + rcond : float or None, optional Relative condition number to the fit. - w : hashable or array-like, optional + w : Hashable, array-like or None, optional Weights to apply to the y-coordinate of the sample points. Can be an array-like object or the name of a coordinate in the dataset. - full : bool, optional + full : bool, default: False Whether to return the residuals, matrix rank and singular values in addition to the coefficients. - cov : bool or str, optional + cov : bool or "unscaled", default: False Whether to return to the covariance matrix in addition to the coefficients. The matrix is not scaled if `cov='unscaled'`. @@ -4182,19 +4344,21 @@ def polyfit( ) def pad( - self, + self: T_DataArray, pad_width: Mapping[Any, int | tuple[int, int]] | None = None, - mode: str = "constant", + mode: PadModeOptions = "constant", stat_length: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, - constant_values: (int | tuple[int, int] | Mapping[Any, tuple[int, int]]) + constant_values: float + | tuple[float, float] + | Mapping[Any, tuple[float, float]] | None = None, end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, - reflect_type: str | None = None, + reflect_type: PadReflectOptions = None, **pad_width_kwargs: Any, - ) -> DataArray: + ) -> T_DataArray: """Pad this array along one or more dimensions. .. warning:: @@ -4207,44 +4371,35 @@ def pad( Parameters ---------- - pad_width : mapping of hashable to tuple of int + pad_width : mapping of Hashable to tuple of int Mapping with the form of {dim: (pad_before, pad_after)} describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad - mode : str, default: "constant" - One of the following string values (taken from numpy docs) - - 'constant' (default) - Pads with a constant value. - 'edge' - Pads with the edge values of array. - 'linear_ramp' - Pads with the linear ramp between end_value and the - array edge value. - 'maximum' - Pads with the maximum value of all or part of the - vector along each axis. - 'mean' - Pads with the mean value of all or part of the - vector along each axis. - 'median' - Pads with the median value of all or part of the - vector along each axis. - 'minimum' - Pads with the minimum value of all or part of the - vector along each axis. - 'reflect' - Pads with the reflection of the vector mirrored on - the first and last values of the vector along each - axis. - 'symmetric' - Pads with the reflection of the vector mirrored - along the edge of the array. - 'wrap' - Pads with the wrap of the vector along the axis. - The first values are used to pad the end and the - end values are used to pad the beginning. - stat_length : int, tuple or mapping of hashable to tuple, default: None + mode : {"constant", "edge", "linear_ramp", "maximum", "mean", "median", \ + "minimum", "reflect", "symmetric", "wrap"}, default: "constant" + How to pad the DataArray (taken from numpy docs): + + - "constant": Pads with a constant value. + - "edge": Pads with the edge values of array. + - "linear_ramp": Pads with the linear ramp between end_value and the + array edge value. + - "maximum": Pads with the maximum value of all or part of the + vector along each axis. + - "mean": Pads with the mean value of all or part of the + vector along each axis. + - "median": Pads with the median value of all or part of the + vector along each axis. + - "minimum": Pads with the minimum value of all or part of the + vector along each axis. + - "reflect": Pads with the reflection of the vector mirrored on + the first and last values of the vector along each axis. + - "symmetric": Pads with the reflection of the vector mirrored + along the edge of the array. + - "wrap": Pads with the wrap of the vector along the axis. + The first values are used to pad the end and the + end values are used to pad the beginning. + + stat_length : int, tuple or mapping of Hashable to tuple, default: None Used in 'maximum', 'mean', 'median', and 'minimum'. Number of values at edge of each axis used to calculate the statistic value. {dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)} unique @@ -4254,7 +4409,7 @@ def pad( (stat_length,) or int is a shortcut for before = after = statistic length for all axes. Default is ``None``, to use the entire axis. - constant_values : scalar, tuple or mapping of hashable to tuple, default: 0 + constant_values : scalar, tuple or mapping of Hashable to tuple, default: 0 Used in 'constant'. The values to set the padded values for each axis. ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique @@ -4264,7 +4419,7 @@ def pad( ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for all dimensions. Default is 0. - end_values : scalar, tuple or mapping of hashable to tuple, default: 0 + end_values : scalar, tuple or mapping of Hashable to tuple, default: 0 Used in 'linear_ramp'. The values used for the ending value of the linear_ramp and that will form the edge of the padded array. ``{dim_1: (before_1, after_1), ... dim_N: (before_N, after_N)}`` unique @@ -4274,9 +4429,9 @@ def pad( ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for all axes. Default is 0. - reflect_type : {"even", "odd"}, optional - Used in "reflect", and "symmetric". The "even" style is the - default with an unaltered reflection around the edge value. For + reflect_type : {"even", "odd", None}, optional + Used in "reflect", and "symmetric". The "even" style is the + default with an unaltered reflection around the edge value. For the "odd" style, the extended part of the array is created by subtracting the reflected values from two times the edge value. **pad_width_kwargs @@ -4352,10 +4507,10 @@ def pad( def idxmin( self, - dim: Hashable = None, - skipna: bool = None, + dim: Hashable | None = None, + skipna: bool | None = None, fill_value: Any = dtypes.NA, - keep_attrs: bool = None, + keep_attrs: bool | None = None, ) -> DataArray: """Return the coordinate label of the minimum value along a dimension. @@ -4382,9 +4537,9 @@ def idxmin( null. By default this is NaN. The fill value and result are automatically converted to a compatible dtype if possible. Ignored if ``skipna`` is False. - keep_attrs : bool, default: False + keep_attrs : bool or None, optional If True, the attributes (``attrs``) will be copied from the - original object to the new one. If False (default), the new object + original object to the new one. If False, the new object will be returned without attributes. Returns @@ -4449,9 +4604,9 @@ def idxmin( def idxmax( self, dim: Hashable = None, - skipna: bool = None, + skipna: bool | None = None, fill_value: Any = dtypes.NA, - keep_attrs: bool = None, + keep_attrs: bool | None = None, ) -> DataArray: """Return the coordinate label of the maximum value along a dimension. @@ -4464,7 +4619,7 @@ def idxmax( Parameters ---------- - dim : hashable, optional + dim : Hashable, optional Dimension over which to apply `idxmax`. This is optional for 1D arrays, but required for arrays with 2 or more dimensions. skipna : bool or None, default: None @@ -4478,9 +4633,9 @@ def idxmax( null. By default this is NaN. The fill value and result are automatically converted to a compatible dtype if possible. Ignored if ``skipna`` is False. - keep_attrs : bool, default: False + keep_attrs : bool or None, optional If True, the attributes (``attrs``) will be copied from the - original object to the new one. If False (default), the new object + original object to the new one. If False, the new object will be returned without attributes. Returns @@ -4542,12 +4697,14 @@ def idxmax( keep_attrs=keep_attrs, ) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def argmin( self, - dim: Hashable | Sequence[Hashable] = None, - axis: int = None, - keep_attrs: bool = None, - skipna: bool = None, + dim: Hashable | Sequence[Hashable] | None = None, + axis: int | None = None, + keep_attrs: bool | None = None, + skipna: bool | None = None, ) -> DataArray | dict[Hashable, DataArray]: """Index or indices of the minimum of the DataArray over one or more dimensions. @@ -4560,19 +4717,19 @@ def argmin( Parameters ---------- - dim : hashable, sequence of hashable or ..., optional + dim : Hashable, sequence of Hashable, None or ..., optional The dimensions over which to find the minimum. By default, finds minimum over all dimensions - for now returning an int for backward compatibility, but this is deprecated, in future will return a dict with indices for all dimensions; to return a dict with all dimensions now, pass '...'. - axis : int, optional + axis : int or None, optional Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments can be supplied. - keep_attrs : bool, optional + keep_attrs : bool or None, optional If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be + object to the new one. If False, the new object will be returned without attributes. - skipna : bool, optional + skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been @@ -4645,12 +4802,14 @@ def argmin( else: return self._replace_maybe_drop_dims(result) + # change type of self and return to T_DataArray once + # https://github.com/python/mypy/issues/12846 is resolved def argmax( self, dim: Hashable | Sequence[Hashable] = None, - axis: int = None, - keep_attrs: bool = None, - skipna: bool = None, + axis: int | None = None, + keep_attrs: bool | None = None, + skipna: bool | None = None, ) -> DataArray | dict[Hashable, DataArray]: """Index or indices of the maximum of the DataArray over one or more dimensions. @@ -4663,19 +4822,19 @@ def argmax( Parameters ---------- - dim : hashable, sequence of hashable or ..., optional + dim : Hashable, sequence of Hashable, None or ..., optional The dimensions over which to find the maximum. By default, finds maximum over all dimensions - for now returning an int for backward compatibility, but this is deprecated, in future will return a dict with indices for all dimensions; to return a dict with all dimensions now, pass '...'. - axis : int, optional + axis : int or None, optional Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments can be supplied. - keep_attrs : bool, optional + keep_attrs : bool or None, optional If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be + object to the new one. If False, the new object will be returned without attributes. - skipna : bool, optional + skipna : bool or None, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or skipna=True has not been @@ -4750,9 +4909,9 @@ def argmax( def query( self, - queries: Mapping[Any, Any] = None, - parser: str = "pandas", - engine: str = None, + queries: Mapping[Any, Any] | None = None, + parser: QueryParserOptions = "pandas", + engine: QueryEngineOptions = None, missing_dims: ErrorOptionsWithWarn = "raise", **queries_kwargs: Any, ) -> DataArray: @@ -4762,8 +4921,8 @@ def query( Parameters ---------- - queries : dict, optional - A dict with keys matching dimensions and values given by strings + queries : dict-like or None, optional + A dict-like with keys matching dimensions and values given by strings containing Python expressions to be evaluated against the data variables in the dataset. The expressions will be evaluated using the pandas eval() function, and can contain any valid Python expressions but cannot @@ -4775,15 +4934,19 @@ def query( parser to retain strict Python semantics. engine : {"python", "numexpr", None}, default: None The engine used to evaluate the expression. Supported engines are: + - None: tries to use numexpr, falls back to python - "numexpr": evaluates expressions using numexpr - "python": performs operations as if you had eval’d in top level python + missing_dims : {"raise", "warn", "ignore"}, default: "raise" What to do if dimensions that should be selected from are not present in the - Dataset: + DataArray: + - "raise": raise an exception - "warn": raise a warning, and ignore the missing dimensions - "ignore": ignore the missing dimensions + **queries_kwargs : {dim: query, ...}, optional The keyword arguments form of ``queries``. One of queries or queries_kwargs must be provided. @@ -4827,13 +4990,13 @@ def curvefit( self, coords: str | DataArray | Iterable[str | DataArray], func: Callable[..., Any], - reduce_dims: Hashable | Iterable[Hashable] = None, + reduce_dims: Hashable | Iterable[Hashable] | None = None, skipna: bool = True, - p0: dict[str, Any] = None, - bounds: dict[str, Any] = None, - param_names: Sequence[str] = None, - kwargs: dict[str, Any] = None, - ): + p0: dict[str, Any] | None = None, + bounds: dict[str, Any] | None = None, + param_names: Sequence[str] | None = None, + kwargs: dict[str, Any] | None = None, + ) -> Dataset: """ Curve fitting optimization for arbitrary functions. @@ -4841,7 +5004,7 @@ def curvefit( Parameters ---------- - coords : hashable, DataArray, or sequence of DataArray or hashable + coords : Hashable, DataArray, or sequence of DataArray or Hashable Independent coordinate(s) over which to perform the curve fitting. Must share at least one dimension with the calling object. When fitting multi-dimensional functions, supply `coords` as a sequence in the same order as arguments in @@ -4852,22 +5015,22 @@ def curvefit( array of length `len(x)`. `params` are the fittable parameters which are optimized by scipy curve_fit. `x` can also be specified as a sequence containing multiple coordinates, e.g. `f((x0, x1), *params)`. - reduce_dims : hashable or sequence of hashable + reduce_dims : Hashable or sequence of Hashable Additional dimension(s) over which to aggregate while fitting. For example, calling `ds.curvefit(coords='time', reduce_dims=['lat', 'lon'], ...)` will aggregate all lat and lon points and fit the specified function along the time dimension. - skipna : bool, optional + skipna : bool, default: True Whether to skip missing values when fitting. Default is True. - p0 : dict-like, optional + p0 : dict-like or None, optional Optional dictionary of parameter names to initial guesses passed to the `curve_fit` `p0` arg. If none or only some parameters are passed, the rest will be assigned initial values following the default scipy behavior. - bounds : dict-like, optional + bounds : dict-like or None, optional Optional dictionary of parameter names to bounding values passed to the `curve_fit` `bounds` arg. If none or only some parameters are passed, the rest will be unbounded following the default scipy behavior. - param_names : sequence of hashable, optional + param_names : sequence of Hashable or None, optional Sequence of names for the fittable parameters of `func`. If not supplied, this will be automatically determined by arguments of `func`. `param_names` should be manually supplied when fitting a function that takes a variable @@ -4902,10 +5065,10 @@ def curvefit( ) def drop_duplicates( - self, + self: T_DataArray, dim: Hashable | Iterable[Hashable], - keep: Literal["first", "last"] | Literal[False] = "first", - ): + keep: Literal["first", "last", False] = "first", + ) -> T_DataArray: """Returns a new DataArray with duplicate dimension values removed. Parameters @@ -4914,6 +5077,7 @@ def drop_duplicates( Pass `...` to drop duplicates along all dimensions. keep : {"first", "last", False}, default: "first" Determines which duplicates (if any) to keep. + - ``"first"`` : Drop duplicates except for the first occurrence. - ``"last"`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e559a8551b6..c5c727f4bed 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -110,6 +110,11 @@ ErrorOptions, ErrorOptionsWithWarn, JoinOptions, + PadModeOptions, + PadReflectOptions, + QueryEngineOptions, + QueryParserOptions, + T_Dataset, T_Xarray, ) @@ -2641,8 +2646,8 @@ def thin( return self.isel(indexers_slices) def broadcast_like( - self, other: Dataset | DataArray, exclude: Iterable[Hashable] = None - ) -> Dataset: + self: T_Dataset, other: Dataset | DataArray, exclude: Iterable[Hashable] = None + ) -> T_Dataset: """Broadcast this DataArray against another Dataset or DataArray. This is equivalent to xr.broadcast(other, self)[1] @@ -2662,7 +2667,9 @@ def broadcast_like( dims_map, common_coords = _get_broadcast_dims_map_common_coords(args, exclude) - return _broadcast_helper(args[1], exclude, dims_map, common_coords) + return _broadcast_helper( + cast("T_Dataset", args[1]), exclude, dims_map, common_coords + ) def _reindex_callback( self, @@ -3667,9 +3674,9 @@ def expand_dims( and the values are either integers (giving the length of the new dimensions) or array-like (giving the coordinates of the new dimensions). - axis : int, sequence of int, or None + axis : int, sequence of int, or None, default: None Axis position(s) where new axis is to be inserted (position(s) on - the result array). If a list (or tuple) of integers is passed, + the result array). If a sequence of integers is passed, multiple axes are inserted. In this case, dim arguments should be same length list. If axis=None is passed, all the axes will be inserted to the start of the result array. @@ -3681,8 +3688,8 @@ def expand_dims( Returns ------- - expanded : same type as caller - This object, but with an additional dimension(s). + expanded : Dataset + This object, but with additional dimension(s). """ if dim is None: pass @@ -3998,18 +4005,18 @@ def reset_index( def reorder_levels( self, - dim_order: Mapping[Any, Sequence[int]] = None, - **dim_order_kwargs: Sequence[int], + dim_order: Mapping[Any, Sequence[int | Hashable]] = None, + **dim_order_kwargs: Sequence[int | Hashable], ) -> Dataset: """Rearrange index levels using input order. Parameters ---------- - dim_order : optional + dim_order : dict-like of Hashable to Sequence of int or Hashable, optional Mapping from names matching dimensions and values given by lists representing new level orders. Every given dimension must have a multi-index. - **dim_order_kwargs : optional + **dim_order_kwargs : Sequence of int or Hashable, optional The keyword arguments form of ``dim_order``. One of dim_order or dim_order_kwargs must be provided. @@ -4174,8 +4181,8 @@ def stack( ellipsis (`...`) will be replaced by all unlisted dimensions. Passing a list containing an ellipsis (`stacked_dim=[...]`) will stack over all dimensions. - create_index : bool, optional - If True (default), create a multi-index for each of the stacked dimensions. + create_index : bool or None, default: True + If True, create a multi-index for each of the stacked dimensions. If False, don't create any index. If None, create a multi-index only if exactly one single (1-d) coordinate index is found for every dimension to stack. @@ -5637,7 +5644,7 @@ def to_array(self, dim="variable", name=None): return DataArray._construct_direct(variable, coords, name, indexes) def _normalize_dim_order( - self, dim_order: list[Hashable] = None + self, dim_order: Sequence[Hashable] | None = None ) -> dict[Hashable, int]: """ Check the validity of the provided dimensions if any and return the mapping @@ -5645,7 +5652,7 @@ def _normalize_dim_order( Parameters ---------- - dim_order + dim_order: Sequence of Hashable or None, optional Dimension order to validate (default to the alphabetical order if None). Returns @@ -5718,7 +5725,7 @@ def to_dataframe(self, dim_order: list[Hashable] = None) -> pd.DataFrame: Returns ------- - result + result : DataFrame Dataset as a pandas DataFrame. """ @@ -6723,7 +6730,7 @@ def rank(self, dim, pct=False, keep_attrs=None): attrs = self.attrs if keep_attrs else None return self._replace(variables, coord_names, attrs=attrs) - def differentiate(self, coord, edge_order=1, datetime_unit=None): + def differentiate(self, coord, edge_order: Literal[1, 2] = 1, datetime_unit=None): """ Differentiate with the second order accurate central differences. @@ -7206,11 +7213,11 @@ def polyfit( self, dim: Hashable, deg: int, - skipna: bool = None, - rcond: float = None, + skipna: bool | None = None, + rcond: float | None = None, w: Hashable | Any = None, full: bool = False, - cov: bool | str = False, + cov: bool | Literal["unscaled"] = False, ): """ Least squares polynomial fit. @@ -7224,19 +7231,19 @@ def polyfit( Coordinate along which to fit the polynomials. deg : int Degree of the fitting polynomial. - skipna : bool, optional + skipna : bool or None, optional If True, removes all invalid values before fitting each 1D slices of the array. Default is True if data is stored in a dask.array or if there is any invalid values, False otherwise. - rcond : float, optional + rcond : float or None, optional Relative condition number to the fit. w : hashable or Any, optional Weights to apply to the y-coordinate of the sample points. Can be an array-like object or the name of a coordinate in the dataset. - full : bool, optional + full : bool, default: False Whether to return the residuals, matrix rank and singular values in addition to the coefficients. - cov : bool or str, optional + cov : bool or "unscaled", default: False Whether to return to the covariance matrix in addition to the coefficients. The matrix is not scaled if `cov='unscaled'`. @@ -7401,16 +7408,16 @@ def polyfit( def pad( self, pad_width: Mapping[Any, int | tuple[int, int]] = None, - mode: str = "constant", + mode: PadModeOptions = "constant", stat_length: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, constant_values: ( - int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None + float | tuple[float, float] | Mapping[Any, tuple[float, float]] | None ) = None, end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, - reflect_type: str = None, + reflect_type: PadReflectOptions = None, **pad_width_kwargs: Any, ) -> Dataset: """Pad this dataset along one or more dimensions. @@ -7429,26 +7436,27 @@ def pad( Mapping with the form of {dim: (pad_before, pad_after)} describing the number of values padded along each dimension. {dim: pad} is a shortcut for pad_before = pad_after = pad - mode : str, default: "constant" - One of the following string values (taken from numpy docs). + mode : {"constant", "edge", "linear_ramp", "maximum", "mean", "median", \ + "minimum", "reflect", "symmetric", "wrap"}, default: "constant" + How to pad the DataArray (taken from numpy docs): - - constant: Pads with a constant value. - - edge: Pads with the edge values of array. - - linear_ramp: Pads with the linear ramp between end_value and the + - "constant": Pads with a constant value. + - "edge": Pads with the edge values of array. + - "linear_ramp": Pads with the linear ramp between end_value and the array edge value. - - maximum: Pads with the maximum value of all or part of the + - "maximum": Pads with the maximum value of all or part of the vector along each axis. - - mean: Pads with the mean value of all or part of the + - "mean": Pads with the mean value of all or part of the vector along each axis. - - median: Pads with the median value of all or part of the + - "median": Pads with the median value of all or part of the vector along each axis. - - minimum: Pads with the minimum value of all or part of the + - "minimum": Pads with the minimum value of all or part of the vector along each axis. - - reflect: Pads with the reflection of the vector mirrored on + - "reflect": Pads with the reflection of the vector mirrored on the first and last values of the vector along each axis. - - symmetric: Pads with the reflection of the vector mirrored + - "symmetric": Pads with the reflection of the vector mirrored along the edge of the array. - - wrap: Pads with the wrap of the vector along the axis. + - "wrap": Pads with the wrap of the vector along the axis. The first values are used to pad the end and the end values are used to pad the beginning. @@ -7482,7 +7490,7 @@ def pad( ``(constant,)`` or ``constant`` is a shortcut for ``before = after = constant`` for all axes. Default is 0. - reflect_type : {"even", "odd"}, optional + reflect_type : {"even", "odd", None}, optional Used in "reflect", and "symmetric". The "even" style is the default with an unaltered reflection around the edge value. For the "odd" style, the extended part of the array is created by @@ -7883,9 +7891,9 @@ def argmax(self, dim=None, **kwargs): def query( self, - queries: Mapping[Any, Any] = None, - parser: str = "pandas", - engine: str = None, + queries: Mapping[Any, Any] | None = None, + parser: QueryParserOptions = "pandas", + engine: QueryEngineOptions = None, missing_dims: ErrorOptionsWithWarn = "raise", **queries_kwargs: Any, ) -> Dataset: @@ -7896,8 +7904,8 @@ def query( Parameters ---------- - queries : dict, optional - A dict with keys matching dimensions and values given by strings + queries : dict-like, optional + A dict-like with keys matching dimensions and values given by strings containing Python expressions to be evaluated against the data variables in the dataset. The expressions will be evaluated using the pandas eval() function, and can contain any valid Python expressions but cannot diff --git a/xarray/core/types.py b/xarray/core/types.py index a2f268b983a..38d13c07c3c 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -24,6 +24,8 @@ T_Variable = TypeVar("T_Variable", bound="Variable") T_Index = TypeVar("T_Index", bound="Index") +T_DataArrayOrSet = TypeVar("T_DataArrayOrSet", bound=Union["Dataset", "DataArray"]) + # Maybe we rename this to T_Data or something less Fortran-y? T_Xarray = TypeVar("T_Xarray", "DataArray", "Dataset") T_DataWithCoords = TypeVar("T_DataWithCoords", bound="DataWithCoords") @@ -36,6 +38,7 @@ ErrorOptions = Literal["raise", "ignore"] ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"] + CompatOptions = Literal[ "identical", "equals", "broadcast_equals", "no_conflicts", "override", "minimal" ] @@ -46,6 +49,35 @@ ] JoinOptions = Literal["outer", "inner", "left", "right", "exact", "override"] +InterpOptions = Literal["linear", "nearest", "zero", "slinear", "quadratic", "cubic"] +Interp1dOptions = Union[InterpOptions, Literal["polynomial"]] +InterpAllOptions = Union[ + Interp1dOptions, Literal["barycentric", "krog", "pchip", "spline", "akima"] +] + +DatetimeUnitOptions = Literal[ + "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps", "fs", "as" +] + +QueryEngineOptions = Literal["python", "numexpr", None] +QueryParserOptions = Literal["pandas", "python"] + +ReindexMethodOptions = Literal["nearest", "pad", "ffill", "backfill", "bfill", None] + +PadModeOptions = Literal[ + "constant", + "edge", + "linear_ramp", + "maximum", + "mean", + "median", + "minimum", + "reflect", + "symmetric", + "wrap", +] +PadReflectOptions = Literal["even", "odd", None] + CFCalendar = Literal[ "standard", "gregorian", diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c34041abb2a..798e5a9f43e 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -59,7 +59,12 @@ BASIC_INDEXING_TYPES = integer_types + (slice,) if TYPE_CHECKING: - from .types import ErrorOptionsWithWarn, T_Variable + from .types import ( + ErrorOptionsWithWarn, + PadModeOptions, + PadReflectOptions, + T_Variable, + ) class MissingDimensionsError(ValueError): @@ -1309,15 +1314,17 @@ def _pad_options_dim_to_index( def pad( self, pad_width: Mapping[Any, int | tuple[int, int]] | None = None, - mode: str = "constant", + mode: PadModeOptions = "constant", stat_length: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, - constant_values: (int | tuple[int, int] | Mapping[Any, tuple[int, int]]) + constant_values: float + | tuple[float, float] + | Mapping[Any, tuple[float, float]] | None = None, end_values: int | tuple[int, int] | Mapping[Any, tuple[int, int]] | None = None, - reflect_type: str | None = None, + reflect_type: PadReflectOptions = None, **pad_width_kwargs: Any, ): """ @@ -1384,7 +1391,7 @@ def pad( pad_width_by_index = self._pad_options_dim_to_index(pad_width) # create pad_options_kwargs, numpy/dask requires only relevant kwargs to be nonempty - pad_option_kwargs = {} + pad_option_kwargs: dict[str, Any] = {} if stat_length is not None: pad_option_kwargs["stat_length"] = stat_length if constant_values is not None: @@ -1392,7 +1399,7 @@ def pad( if end_values is not None: pad_option_kwargs["end_values"] = end_values if reflect_type is not None: - pad_option_kwargs["reflect_type"] = reflect_type # type: ignore[assignment] + pad_option_kwargs["reflect_type"] = reflect_type array = np.pad( # type: ignore[call-overload] self.data.astype(dtype, copy=False), @@ -1452,14 +1459,14 @@ def roll(self, shifts=None, **shifts_kwargs): def transpose( self, - *dims, + *dims: Hashable, missing_dims: ErrorOptionsWithWarn = "raise", ) -> Variable: """Return a new Variable object with transposed dimensions. Parameters ---------- - *dims : str, optional + *dims : Hashable, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. missing_dims : {"raise", "warn", "ignore"}, default: "raise" diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 65f0bc08261..ff477a40891 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -1,7 +1,10 @@ +from __future__ import annotations + import importlib import platform import warnings from contextlib import contextmanager, nullcontext +from typing import Any from unittest import mock # noqa: F401 import numpy as np @@ -40,7 +43,7 @@ ) -def _importorskip(modname, minversion=None): +def _importorskip(modname: str, minversion: str | None = None) -> tuple[bool, Any]: try: mod = importlib.import_module(modname) has = True diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 970e2a8e710..e488f5afad9 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -3,6 +3,7 @@ import warnings from copy import deepcopy from textwrap import dedent +from typing import Any, Final, Hashable, cast import numpy as np import pandas as pd @@ -25,6 +26,7 @@ from xarray.core import dtypes from xarray.core.common import full_like from xarray.core.indexes import Index, PandasIndex, filter_indexes_from_coords +from xarray.core.types import QueryEngineOptions, QueryParserOptions from xarray.core.utils import is_scalar from xarray.tests import ( ReturnItem, @@ -69,7 +71,7 @@ def setup(self): ) self.mda = DataArray([0, 1, 2, 3], coords={"x": self.mindex}, dims="x") - def test_repr(self): + def test_repr(self) -> None: v = Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"}) coords = {"x": np.arange(3, dtype=np.int64), "other": np.int64(0)} data_array = DataArray(v, coords, name="my_variable") @@ -87,7 +89,7 @@ def test_repr(self): ) assert expected == repr(data_array) - def test_repr_multiindex(self): + def test_repr_multiindex(self) -> None: expected = dedent( """\ @@ -99,7 +101,7 @@ def test_repr_multiindex(self): ) assert expected == repr(self.mda) - def test_repr_multiindex_long(self): + def test_repr_multiindex_long(self) -> None: mindex_long = pd.MultiIndex.from_product( [["a", "b", "c", "d"], [1, 2, 3, 4, 5, 6, 7, 8]], names=("level_1", "level_2"), @@ -117,7 +119,7 @@ def test_repr_multiindex_long(self): ) assert expected == repr(mda_long) - def test_properties(self): + def test_properties(self) -> None: assert_equal(self.dv.variable, self.v) assert_array_equal(self.dv.values, self.v.values) for attr in ["dims", "dtype", "shape", "size", "nbytes", "ndim", "attrs"]: @@ -135,7 +137,7 @@ def test_properties(self): with pytest.raises(AttributeError): self.dv.variable = self.v - def test_data_property(self): + def test_data_property(self) -> None: array = DataArray(np.zeros((3, 4))) actual = array.copy() actual.values = np.ones((3, 4)) @@ -144,7 +146,7 @@ def test_data_property(self): assert_array_equal(2 * np.ones((3, 4)), actual.data) assert_array_equal(actual.data, actual.values) - def test_indexes(self): + def test_indexes(self) -> None: array = DataArray(np.zeros((2, 3)), [("x", [0, 1]), ("y", ["a", "b", "c"])]) expected_indexes = {"x": pd.Index([0, 1]), "y": pd.Index(["a", "b", "c"])} expected_xindexes = { @@ -158,21 +160,21 @@ def test_indexes(self): assert array.xindexes[k].equals(expected_xindexes[k]) assert array.indexes[k].equals(expected_indexes[k]) - def test_get_index(self): + def test_get_index(self) -> None: array = DataArray(np.zeros((2, 3)), coords={"x": ["a", "b"]}, dims=["x", "y"]) assert array.get_index("x").equals(pd.Index(["a", "b"])) assert array.get_index("y").equals(pd.Index([0, 1, 2])) with pytest.raises(KeyError): array.get_index("z") - def test_get_index_size_zero(self): + def test_get_index_size_zero(self) -> None: array = DataArray(np.zeros((0,)), dims=["x"]) actual = array.get_index("x") expected = pd.Index([], dtype=np.int64) assert actual.equals(expected) assert actual.dtype == expected.dtype - def test_struct_array_dims(self): + def test_struct_array_dims(self) -> None: """ This test checks subtraction of two DataArrays for the case when dimension is a structured array. @@ -230,7 +232,7 @@ def test_struct_array_dims(self): assert_identical(actual, expected) - def test_name(self): + def test_name(self) -> None: arr = self.dv assert arr.name == "foo" @@ -244,33 +246,33 @@ def test_name(self): expected = DataArray([3], [("x", [3])], name="y") assert_identical(actual, expected) - def test_dims(self): + def test_dims(self) -> None: arr = self.dv assert arr.dims == ("x", "y") with pytest.raises(AttributeError, match=r"you cannot assign"): arr.dims = ("w", "z") - def test_sizes(self): + def test_sizes(self) -> None: array = DataArray(np.zeros((3, 4)), dims=["x", "y"]) assert array.sizes == {"x": 3, "y": 4} assert tuple(array.sizes) == array.dims with pytest.raises(TypeError): - array.sizes["foo"] = 5 + array.sizes["foo"] = 5 # type: ignore - def test_encoding(self): + def test_encoding(self) -> None: expected = {"foo": "bar"} self.dv.encoding["foo"] = "bar" assert expected == self.dv.encoding - expected = {"baz": 0} - self.dv.encoding = expected + expected2 = {"baz": 0} + self.dv.encoding = expected2 + assert expected2 is not self.dv.encoding - assert expected is not self.dv.encoding - - def test_constructor(self): + def test_constructor(self) -> None: data = np.random.random((2, 3)) + # w/o coords, w/o dims actual = DataArray(data) expected = Dataset({None: (["dim_0", "dim_1"], data)})[None] assert_identical(expected, actual) @@ -285,6 +287,7 @@ def test_constructor(self): )[None] assert_identical(expected, actual) + # pd.Index coords, w/o dims actual = DataArray( data, [pd.Index(["a", "b"], name="x"), pd.Index([-1, -2, -3], name="y")] ) @@ -293,54 +296,66 @@ def test_constructor(self): )[None] assert_identical(expected, actual) - coords = [["a", "b"], [-1, -2, -3]] - actual = DataArray(data, coords, ["x", "y"]) + # list coords, w dims + coords1 = [["a", "b"], [-1, -2, -3]] + actual = DataArray(data, coords1, ["x", "y"]) assert_identical(expected, actual) - coords = [pd.Index(["a", "b"], name="A"), pd.Index([-1, -2, -3], name="B")] - actual = DataArray(data, coords, ["x", "y"]) + # pd.Index coords, w dims + coords2 = [pd.Index(["a", "b"], name="A"), pd.Index([-1, -2, -3], name="B")] + actual = DataArray(data, coords2, ["x", "y"]) assert_identical(expected, actual) - coords = {"x": ["a", "b"], "y": [-1, -2, -3]} - actual = DataArray(data, coords, ["x", "y"]) + # dict coords, w dims + coords3 = {"x": ["a", "b"], "y": [-1, -2, -3]} + actual = DataArray(data, coords3, ["x", "y"]) assert_identical(expected, actual) - actual = DataArray(data, coords) + # dict coords, w/o dims + actual = DataArray(data, coords3) assert_identical(expected, actual) - coords = [("x", ["a", "b"]), ("y", [-1, -2, -3])] - actual = DataArray(data, coords) + # tuple[dim, list] coords, w/o dims + coords4 = [("x", ["a", "b"]), ("y", [-1, -2, -3])] + actual = DataArray(data, coords4) assert_identical(expected, actual) + # partial dict coords, w dims expected = Dataset({None: (["x", "y"], data), "x": ("x", ["a", "b"])})[None] actual = DataArray(data, {"x": ["a", "b"]}, ["x", "y"]) assert_identical(expected, actual) + # w/o coords, w dims actual = DataArray(data, dims=["x", "y"]) expected = Dataset({None: (["x", "y"], data)})[None] assert_identical(expected, actual) + # w/o coords, w dims, w name actual = DataArray(data, dims=["x", "y"], name="foo") expected = Dataset({"foo": (["x", "y"], data)})["foo"] assert_identical(expected, actual) + # w/o coords, w/o dims, w name actual = DataArray(data, name="foo") expected = Dataset({"foo": (["dim_0", "dim_1"], data)})["foo"] assert_identical(expected, actual) + # w/o coords, w dims, w attrs actual = DataArray(data, dims=["x", "y"], attrs={"bar": 2}) expected = Dataset({None: (["x", "y"], data, {"bar": 2})})[None] assert_identical(expected, actual) + # w/o coords, w dims (ds has attrs) actual = DataArray(data, dims=["x", "y"]) expected = Dataset({None: (["x", "y"], data, {}, {"bar": 2})})[None] assert_identical(expected, actual) + # data is list, w coords actual = DataArray([1, 2, 3], coords={"x": [0, 1, 2]}) expected = DataArray([1, 2, 3], coords=[("x", [0, 1, 2])]) assert_identical(expected, actual) - def test_constructor_invalid(self): + def test_constructor_invalid(self) -> None: data = np.random.randn(3, 2) with pytest.raises(ValueError, match=r"coords is not dict-like"): @@ -367,7 +382,7 @@ def test_constructor_invalid(self): with pytest.raises(ValueError, match=r"matching the dimension size"): DataArray(data, coords={"x": 0}, dims=["x", "y"]) - def test_constructor_from_self_described(self): + def test_constructor_from_self_described(self) -> None: data = [[-0.1, 21], [0, 2]] expected = DataArray( data, @@ -413,7 +428,7 @@ def test_constructor_from_self_described(self): assert_identical(expected, actual) @requires_dask - def test_constructor_from_self_described_chunked(self): + def test_constructor_from_self_described_chunked(self) -> None: expected = DataArray( [[-0.1, 21], [0, 2]], coords={"x": ["a", "b"], "y": [-1, -2]}, @@ -425,13 +440,13 @@ def test_constructor_from_self_described_chunked(self): assert_identical(expected, actual) assert_chunks_equal(expected, actual) - def test_constructor_from_0d(self): + def test_constructor_from_0d(self) -> None: expected = Dataset({None: ([], 0)})[None] actual = DataArray(0) assert_identical(expected, actual) @requires_dask - def test_constructor_dask_coords(self): + def test_constructor_dask_coords(self) -> None: # regression test for GH1684 import dask.array as da @@ -443,7 +458,7 @@ def test_constructor_dask_coords(self): expected = DataArray(data, coords={"x": ecoord, "y": ecoord}, dims=["x", "y"]) assert_equal(actual, expected) - def test_equals_and_identical(self): + def test_equals_and_identical(self) -> None: orig = DataArray(np.arange(5.0), {"a": 42}, dims="x") expected = orig @@ -488,13 +503,13 @@ def test_equals_and_identical(self): assert not expected.equals(actual) assert not expected.identical(actual) - def test_equals_failures(self): + def test_equals_failures(self) -> None: orig = DataArray(np.arange(5.0), {"a": 42}, dims="x") - assert not orig.equals(np.arange(5)) - assert not orig.identical(123) - assert not orig.broadcast_equals({1: 2}) + assert not orig.equals(np.arange(5)) # type: ignore + assert not orig.identical(123) # type: ignore + assert not orig.broadcast_equals({1: 2}) # type: ignore - def test_broadcast_equals(self): + def test_broadcast_equals(self) -> None: a = DataArray([0, 0], {"y": 0}, dims="x") b = DataArray([0, 0], {"y": ("x", [0, 0])}, dims="x") assert a.broadcast_equals(b) @@ -506,7 +521,7 @@ def test_broadcast_equals(self): assert not a.broadcast_equals(c) assert not c.broadcast_equals(a) - def test_getitem(self): + def test_getitem(self) -> None: # strings pull out dataarrays assert_identical(self.dv, self.ds["foo"]) x = self.dv["x"] @@ -543,12 +558,12 @@ def test_getitem(self): ]: assert_array_equal(self.v[i], self.dv[i]) - def test_getitem_dict(self): + def test_getitem_dict(self) -> None: actual = self.dv[{"x": slice(3), "y": 0}] expected = self.dv.isel(x=slice(3), y=0) assert_identical(expected, actual) - def test_getitem_coords(self): + def test_getitem_coords(self) -> None: orig = DataArray( [[10], [20]], { @@ -604,7 +619,7 @@ def test_getitem_coords(self): ) assert_identical(expected, actual) - def test_getitem_dataarray(self): + def test_getitem_dataarray(self) -> None: # It should not conflict da = DataArray(np.arange(12).reshape((3, 4)), dims=["x", "y"]) ind = DataArray([[0, 1], [0, 1]], dims=["x", "z"]) @@ -628,7 +643,7 @@ def test_getitem_dataarray(self): assert_equal(da[ind], da[[0, 1]]) assert_equal(da[ind], da[ind.values]) - def test_getitem_empty_index(self): + def test_getitem_empty_index(self) -> None: da = DataArray(np.arange(12).reshape((3, 4)), dims=["x", "y"]) assert_identical(da[{"x": []}], DataArray(np.zeros((0, 4)), dims=["x", "y"])) assert_identical( @@ -636,7 +651,7 @@ def test_getitem_empty_index(self): ) assert_identical(da[[]], DataArray(np.zeros((0, 4)), dims=["x", "y"])) - def test_setitem(self): + def test_setitem(self) -> None: # basic indexing should work as numpy's indexing tuples = [ (0, 0), @@ -663,7 +678,7 @@ def test_setitem(self): expected[t] = 1 assert_array_equal(orig.values, expected) - def test_setitem_fancy(self): + def test_setitem_fancy(self) -> None: # vectorized indexing da = DataArray(np.ones((3, 2)), dims=["x", "y"]) ind = Variable(["a"], [0, 1]) @@ -693,7 +708,7 @@ def test_setitem_fancy(self): expected = DataArray([[0, 0], [0, 0], [1, 1]], dims=["x", "y"]) assert_identical(expected, da) - def test_setitem_dataarray(self): + def test_setitem_dataarray(self) -> None: def get_data(): return DataArray( np.ones((4, 3, 2)), @@ -764,18 +779,18 @@ def get_data(): ) da[dict(x=ind)] = value # should not raise - def test_contains(self): + def test_contains(self) -> None: data_array = DataArray([1, 2]) assert 1 in data_array assert 3 not in data_array - def test_pickle(self): + def test_pickle(self) -> None: data = DataArray(np.random.random((3, 3)), dims=("id", "time")) roundtripped = pickle.loads(pickle.dumps(data)) assert_identical(data, roundtripped) @requires_dask - def test_chunk(self): + def test_chunk(self) -> None: unblocked = DataArray(np.ones((3, 4))) assert unblocked.chunks is None @@ -809,7 +824,7 @@ def test_chunk(self): assert blocked.chunks == ((3,), (3, 1)) assert blocked.data.name != first_dask_name - def test_isel(self): + def test_isel(self) -> None: assert_identical(self.dv[0], self.dv.isel(x=0)) assert_identical(self.dv, self.dv.isel(x=slice(None))) assert_identical(self.dv[:3], self.dv.isel(x=slice(3))) @@ -828,7 +843,7 @@ def test_isel(self): self.dv.isel(not_a_dim=0, missing_dims="warn") assert_identical(self.dv, self.dv.isel(not_a_dim=0, missing_dims="ignore")) - def test_isel_types(self): + def test_isel_types(self) -> None: # regression test for #1405 da = DataArray([1, 2, 3], dims="x") # uint64 @@ -845,7 +860,7 @@ def test_isel_types(self): ) @pytest.mark.filterwarnings("ignore::DeprecationWarning") - def test_isel_fancy(self): + def test_isel_fancy(self) -> None: shape = (10, 7, 6) np_array = np.random.random(shape) da = DataArray( @@ -876,9 +891,9 @@ def test_isel_fancy(self): da.isel(time=(("points",), [1, 2])) y = [-1, 0] x = [-2, 2] - expected = da.values[:, y, x] - actual = da.isel(x=(("points",), x), y=(("points",), y)).values - np.testing.assert_equal(actual, expected) + expected2 = da.values[:, y, x] + actual2 = da.isel(x=(("points",), x), y=(("points",), y)).values + np.testing.assert_equal(actual2, expected2) # test that the order of the indexers doesn't matter assert_identical( @@ -896,10 +911,10 @@ def test_isel_fancy(self): stations["dim1s"] = (("station",), [1, 2, 3]) stations["dim2s"] = (("station",), [4, 5, 1]) - actual = da.isel(x=stations["dim1s"], y=stations["dim2s"]) - assert "station" in actual.coords - assert "station" in actual.dims - assert_identical(actual["station"], stations["station"]) + actual3 = da.isel(x=stations["dim1s"], y=stations["dim2s"]) + assert "station" in actual3.coords + assert "station" in actual3.dims + assert_identical(actual3["station"], stations["station"]) with pytest.raises(ValueError, match=r"conflicting values/indexes on "): da.isel( @@ -914,19 +929,19 @@ def test_isel_fancy(self): stations["dim1s"] = (("a", "b"), [[1, 2], [2, 3], [3, 4]]) stations["dim2s"] = (("a",), [4, 5, 1]) - actual = da.isel(x=stations["dim1s"], y=stations["dim2s"]) - assert "a" in actual.coords - assert "a" in actual.dims - assert "b" in actual.coords - assert "b" in actual.dims - assert_identical(actual["a"], stations["a"]) - assert_identical(actual["b"], stations["b"]) - expected = da.variable[ + actual4 = da.isel(x=stations["dim1s"], y=stations["dim2s"]) + assert "a" in actual4.coords + assert "a" in actual4.dims + assert "b" in actual4.coords + assert "b" in actual4.dims + assert_identical(actual4["a"], stations["a"]) + assert_identical(actual4["b"], stations["b"]) + expected4 = da.variable[ :, stations["dim2s"].variable, stations["dim1s"].variable ] - assert_array_equal(actual, expected) + assert_array_equal(actual4, expected4) - def test_sel(self): + def test_sel(self) -> None: self.ds["x"] = ("x", np.array(list("abcdefghij"))) da = self.ds["foo"] assert_identical(da, da.sel(x=slice(None))) @@ -939,7 +954,7 @@ def test_sel(self): assert_identical(da[1], da.sel(x=b)) assert_identical(da[[1]], da.sel(x=slice(b, b))) - def test_sel_dataarray(self): + def test_sel_dataarray(self) -> None: # indexing with DataArray self.ds["x"] = ("x", np.array(list("abcdefghij"))) da = self.ds["foo"] @@ -964,12 +979,12 @@ def test_sel_dataarray(self): assert "new_dim" in actual.coords assert_equal(actual["new_dim"].drop_vars("x"), ind["new_dim"]) - def test_sel_invalid_slice(self): + def test_sel_invalid_slice(self) -> None: array = DataArray(np.arange(10), [("x", np.arange(10))]) with pytest.raises(ValueError, match=r"cannot use non-scalar arrays"): array.sel(x=slice(array.x)) - def test_sel_dataarray_datetime_slice(self): + def test_sel_dataarray_datetime_slice(self) -> None: # regression test for GH1240 times = pd.date_range("2000-01-01", freq="D", periods=365) array = DataArray(np.arange(365), [("time", times)]) @@ -980,7 +995,7 @@ def test_sel_dataarray_datetime_slice(self): result = array.sel(delta=slice(array.delta[0], array.delta[-1])) assert_equal(result, array) - def test_sel_float(self): + def test_sel_float(self) -> None: data_values = np.arange(4) # case coords are float32 and label is list of floats @@ -1007,7 +1022,7 @@ def test_sel_float(self): assert_equal(expected_scalar, actual_scalar) assert_equal(expected_16, actual_16) - def test_sel_float_multiindex(self): + def test_sel_float_multiindex(self) -> None: # regression test https://github.com/pydata/xarray/issues/5691 # test multi-index created from coordinates, one with dtype=float32 lvl1 = ["a", "a", "b", "b"] @@ -1022,14 +1037,14 @@ def test_sel_float_multiindex(self): assert_equal(actual, expected) - def test_sel_no_index(self): + def test_sel_no_index(self) -> None: array = DataArray(np.arange(10), dims="x") assert_identical(array[0], array.sel(x=0)) assert_identical(array[:5], array.sel(x=slice(5))) assert_identical(array[[0, -1]], array.sel(x=[0, -1])) assert_identical(array[array < 5], array.sel(x=(array < 5))) - def test_sel_method(self): + def test_sel_method(self) -> None: data = DataArray(np.random.randn(3, 4), [("x", [0, 1, 2]), ("y", list("abcd"))]) expected = data.sel(y=["a", "b"]) @@ -1040,7 +1055,7 @@ def test_sel_method(self): actual = data.sel(x=[0.9, 1.9], method="backfill", tolerance=1) assert_identical(expected, actual) - def test_sel_drop(self): + def test_sel_drop(self) -> None: data = DataArray([1, 2, 3], [("x", [0, 1, 2])]) expected = DataArray(1) selected = data.sel(x=0, drop=True) @@ -1055,7 +1070,7 @@ def test_sel_drop(self): selected = data.sel(x=0, drop=True) assert_identical(expected, selected) - def test_isel_drop(self): + def test_isel_drop(self) -> None: data = DataArray([1, 2, 3], [("x", [0, 1, 2])]) expected = DataArray(1) selected = data.isel(x=0, drop=True) @@ -1065,7 +1080,7 @@ def test_isel_drop(self): selected = data.isel(x=0, drop=False) assert_identical(expected, selected) - def test_head(self): + def test_head(self) -> None: assert_equal(self.dv.isel(x=slice(5)), self.dv.head(x=5)) assert_equal(self.dv.isel(x=slice(0)), self.dv.head(x=0)) assert_equal( @@ -1081,7 +1096,7 @@ def test_head(self): with pytest.raises(ValueError, match=r"expected positive int"): self.dv.head(-3) - def test_tail(self): + def test_tail(self) -> None: assert_equal(self.dv.isel(x=slice(-5, None)), self.dv.tail(x=5)) assert_equal(self.dv.isel(x=slice(0)), self.dv.tail(x=0)) assert_equal( @@ -1098,7 +1113,7 @@ def test_tail(self): with pytest.raises(ValueError, match=r"expected positive int"): self.dv.tail(-3) - def test_thin(self): + def test_thin(self) -> None: assert_equal(self.dv.isel(x=slice(None, None, 5)), self.dv.thin(x=5)) assert_equal( self.dv.isel({dim: slice(None, None, 6) for dim in self.dv.dims}), @@ -1113,10 +1128,11 @@ def test_thin(self): with pytest.raises(ValueError, match=r"cannot be zero"): self.dv.thin(time=0) - def test_loc(self): + def test_loc(self) -> None: self.ds["x"] = ("x", np.array(list("abcdefghij"))) da = self.ds["foo"] - assert_identical(da[:3], da.loc[:"c"]) + # typing issue: see https://github.com/python/mypy/issues/2410 + assert_identical(da[:3], da.loc[:"c"]) # type: ignore[misc] assert_identical(da[1], da.loc["b"]) assert_identical(da[1], da.loc[{"x": "b"}]) assert_identical(da[1], da.loc["b", ...]) @@ -1124,17 +1140,18 @@ def test_loc(self): assert_identical(da[:3, :4], da.loc[["a", "b", "c"], np.arange(4)]) assert_identical(da[:, :4], da.loc[:, self.ds["y"] < 4]) - def test_loc_datetime64_value(self): + def test_loc_datetime64_value(self) -> None: # regression test for https://github.com/pydata/xarray/issues/4283 t = np.array(["2017-09-05T12", "2017-09-05T15"], dtype="datetime64[ns]") array = DataArray(np.ones(t.shape), dims=("time",), coords=(t,)) assert_identical(array.loc[{"time": t[0]}], array[0]) - def test_loc_assign(self): + def test_loc_assign(self) -> None: self.ds["x"] = ("x", np.array(list("abcdefghij"))) da = self.ds["foo"] # assignment - da.loc["a":"j"] = 0 + # typing issue: see https://github.com/python/mypy/issues/2410 + da.loc["a":"j"] = 0 # type: ignore[misc] assert np.all(da.values == 0) da.loc[{"x": slice("a", "j")}] = 2 assert np.all(da.values == 2) @@ -1153,7 +1170,7 @@ def test_loc_assign(self): assert np.all(da.values[0] == np.zeros(4)) assert da.values[1, 0] != 0 - def test_loc_assign_dataarray(self): + def test_loc_assign_dataarray(self) -> None: def get_data(): return DataArray( np.ones((4, 3, 2)), @@ -1199,12 +1216,12 @@ def get_data(): assert_identical(da["x"], get_data()["x"]) assert_identical(da["non-dim"], get_data()["non-dim"]) - def test_loc_single_boolean(self): + def test_loc_single_boolean(self) -> None: data = DataArray([0, 1], coords=[[True, False]]) assert data.loc[True] == 0 assert data.loc[False] == 1 - def test_loc_dim_name_collision_with_sel_params(self): + def test_loc_dim_name_collision_with_sel_params(self) -> None: da = xr.DataArray( [[0, 0], [1, 1]], dims=["dim1", "method"], @@ -1214,13 +1231,15 @@ def test_loc_dim_name_collision_with_sel_params(self): da.loc[dict(dim1=["x", "y"], method=["a"])], [[0], [1]] ) - def test_selection_multiindex(self): + def test_selection_multiindex(self) -> None: mindex = pd.MultiIndex.from_product( [["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three") ) mdata = DataArray(range(8), [("x", mindex)]) - def test_sel(lab_indexer, pos_indexer, replaced_idx=False, renamed_dim=None): + def test_sel( + lab_indexer, pos_indexer, replaced_idx=False, renamed_dim=None + ) -> None: da = mdata.sel(x=lab_indexer) expected_da = mdata.isel(x=pos_indexer) if not replaced_idx: @@ -1252,7 +1271,7 @@ def test_sel(lab_indexer, pos_indexer, replaced_idx=False, renamed_dim=None): assert_identical(mdata.sel(x={"one": "a", "two": 1}), mdata.sel(one="a", two=1)) - def test_selection_multiindex_remove_unused(self): + def test_selection_multiindex_remove_unused(self) -> None: # GH2619. For MultiIndex, we need to call remove_unused. ds = xr.DataArray( np.arange(40).reshape(8, 5), @@ -1269,7 +1288,7 @@ def test_selection_multiindex_remove_unused(self): expected = expected.set_index(xy=["x", "y"]).unstack() assert_identical(expected, actual) - def test_selection_multiindex_from_level(self): + def test_selection_multiindex_from_level(self) -> None: # GH: 3512 da = DataArray([0, 1], dims=["x"], coords={"x": [0, 1], "y": "a"}) db = DataArray([2, 3], dims=["x"], coords={"x": [0, 1], "y": "b"}) @@ -1279,20 +1298,20 @@ def test_selection_multiindex_from_level(self): expected = data.isel(xy=[0, 1]).unstack("xy").squeeze("y") assert_equal(actual, expected) - def test_virtual_default_coords(self): + def test_virtual_default_coords(self) -> None: array = DataArray(np.zeros((5,)), dims="x") expected = DataArray(range(5), dims="x", name="x") assert_identical(expected, array["x"]) assert_identical(expected, array.coords["x"]) - def test_virtual_time_components(self): + def test_virtual_time_components(self) -> None: dates = pd.date_range("2000-01-01", periods=10) da = DataArray(np.arange(1, 11), [("time", dates)]) assert_array_equal(da["time.dayofyear"], da.values) assert_array_equal(da.coords["time.dayofyear"], da.values) - def test_coords(self): + def test_coords(self) -> None: # use int64 to ensure repr() consistency on windows coords = [ IndexVariable("x", np.array([-1, -2], "int64")), @@ -1316,14 +1335,14 @@ def test_coords(self): with pytest.raises(KeyError): da.coords["foo"] - expected = dedent( + expected_repr = dedent( """\ Coordinates: * x (x) int64 -1 -2 * y (y) int64 0 1 2""" ) actual = repr(da.coords) - assert expected == actual + assert expected_repr == actual del da.coords["x"] da._indexes = filter_indexes_from_coords(da.xindexes, set(da.coords)) @@ -1336,7 +1355,7 @@ def test_coords(self): self.mda["level_1"] = ("x", np.arange(4)) self.mda.coords["level_1"] = ("x", np.arange(4)) - def test_coords_to_index(self): + def test_coords_to_index(self) -> None: da = DataArray(np.zeros((2, 3)), [("x", [1, 2]), ("y", list("abc"))]) with pytest.raises(ValueError, match=r"no valid index"): @@ -1361,7 +1380,7 @@ def test_coords_to_index(self): with pytest.raises(ValueError, match=r"ordered_dims must match"): da.coords.to_index(["x"]) - def test_coord_coords(self): + def test_coord_coords(self) -> None: orig = DataArray( [10, 20], {"x": [1, 2], "x2": ("x", ["a", "b"]), "z": 4}, dims="x" ) @@ -1381,7 +1400,7 @@ def test_coord_coords(self): ) assert_identical(expected, actual) - def test_reset_coords(self): + def test_reset_coords(self) -> None: data = DataArray( np.zeros((3, 4)), {"bar": ("x", ["a", "b", "c"]), "baz": ("y", range(4)), "y": range(4)}, @@ -1389,8 +1408,8 @@ def test_reset_coords(self): name="foo", ) - actual = data.reset_coords() - expected = Dataset( + actual1 = data.reset_coords() + expected1 = Dataset( { "foo": (["x", "y"], np.zeros((3, 4))), "bar": ("x", ["a", "b", "c"]), @@ -1398,39 +1417,38 @@ def test_reset_coords(self): "y": range(4), } ) - assert_identical(actual, expected) + assert_identical(actual1, expected1) - actual = data.reset_coords(["bar", "baz"]) - assert_identical(actual, expected) + actual2 = data.reset_coords(["bar", "baz"]) + assert_identical(actual2, expected1) - actual = data.reset_coords("bar") - expected = Dataset( + actual3 = data.reset_coords("bar") + expected3 = Dataset( {"foo": (["x", "y"], np.zeros((3, 4))), "bar": ("x", ["a", "b", "c"])}, {"baz": ("y", range(4)), "y": range(4)}, ) - assert_identical(actual, expected) + assert_identical(actual3, expected3) - actual = data.reset_coords(["bar"]) - assert_identical(actual, expected) + actual4 = data.reset_coords(["bar"]) + assert_identical(actual4, expected3) - actual = data.reset_coords(drop=True) - expected = DataArray( + actual5 = data.reset_coords(drop=True) + expected5 = DataArray( np.zeros((3, 4)), coords={"y": range(4)}, dims=["x", "y"], name="foo" ) - assert_identical(actual, expected) + assert_identical(actual5, expected5) - actual = data.copy() - actual = actual.reset_coords(drop=True) - assert_identical(actual, expected) + actual6 = data.copy().reset_coords(drop=True) + assert_identical(actual6, expected5) - actual = data.reset_coords("bar", drop=True) - expected = DataArray( + actual7 = data.reset_coords("bar", drop=True) + expected7 = DataArray( np.zeros((3, 4)), {"baz": ("y", range(4)), "y": range(4)}, dims=["x", "y"], name="foo", ) - assert_identical(actual, expected) + assert_identical(actual7, expected7) with pytest.raises(ValueError, match=r"cannot be found"): data.reset_coords("foo", drop=True) @@ -1445,7 +1463,7 @@ def test_reset_coords(self): with pytest.raises(ValueError, match=r"cannot remove index"): data.reset_coords("lvl1") - def test_assign_coords(self): + def test_assign_coords(self) -> None: array = DataArray(10) actual = array.assign_coords(c=42) expected = DataArray(10, {"c": 42}) @@ -1465,7 +1483,7 @@ def test_assign_coords(self): with pytest.raises(ValueError): da.coords["x"] = ("y", [1, 2, 3]) # no new dimension to a DataArray - def test_coords_alignment(self): + def test_coords_alignment(self) -> None: lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])]) rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])]) lhs.coords["rhs"] = rhs @@ -1475,18 +1493,18 @@ def test_coords_alignment(self): ) assert_identical(lhs, expected) - def test_set_coords_update_index(self): + def test_set_coords_update_index(self) -> None: actual = DataArray([1, 2, 3], [("x", [1, 2, 3])]) actual.coords["x"] = ["a", "b", "c"] assert actual.xindexes["x"].to_pandas_index().equals(pd.Index(["a", "b", "c"])) - def test_set_coords_multiindex_level(self): + def test_set_coords_multiindex_level(self) -> None: with pytest.raises( ValueError, match=r"cannot set or update variable.*corrupt.*index " ): self.mda["level_1"] = range(4) - def test_coords_replacement_alignment(self): + def test_coords_replacement_alignment(self) -> None: # regression test for GH725 arr = DataArray([0, 1, 2], dims=["abc"]) new_coord = DataArray([1, 2, 3], dims=["abc"], coords=[[1, 2, 3]]) @@ -1494,25 +1512,25 @@ def test_coords_replacement_alignment(self): expected = DataArray([0, 1, 2], coords=[("abc", [1, 2, 3])]) assert_identical(arr, expected) - def test_coords_non_string(self): + def test_coords_non_string(self) -> None: arr = DataArray(0, coords={1: 2}) actual = arr.coords[1] expected = DataArray(2, coords={1: 2}, name=1) assert_identical(actual, expected) - def test_coords_delitem_delete_indexes(self): + def test_coords_delitem_delete_indexes(self) -> None: # regression test for GH3746 arr = DataArray(np.ones((2,)), dims="x", coords={"x": [0, 1]}) del arr.coords["x"] assert "x" not in arr.xindexes - def test_coords_delitem_multiindex_level(self): + def test_coords_delitem_multiindex_level(self) -> None: with pytest.raises( ValueError, match=r"cannot remove coordinate.*corrupt.*index " ): del self.mda.coords["level_1"] - def test_broadcast_like(self): + def test_broadcast_like(self) -> None: arr1 = DataArray( np.ones((2, 3)), dims=["x", "y"], @@ -1537,7 +1555,7 @@ def test_broadcast_like(self): assert_identical(orig3.broadcast_like(orig4), new3.transpose("y", "x")) assert_identical(orig4.broadcast_like(orig3), new4) - def test_reindex_like(self): + def test_reindex_like(self) -> None: foo = DataArray(np.random.randn(5, 6), [("x", range(5)), ("y", range(6))]) bar = foo[:2, :2] assert_identical(foo.reindex_like(bar), bar) @@ -1547,7 +1565,7 @@ def test_reindex_like(self): expected[:2, :2] = bar assert_identical(bar.reindex_like(foo), expected) - def test_reindex_like_no_index(self): + def test_reindex_like_no_index(self) -> None: foo = DataArray(np.random.randn(5, 6), dims=["x", "y"]) assert_identical(foo, foo.reindex_like(foo)) @@ -1555,15 +1573,15 @@ def test_reindex_like_no_index(self): with pytest.raises(ValueError, match=r"different size for unlabeled"): foo.reindex_like(bar) - def test_reindex_regressions(self): + def test_reindex_regressions(self) -> None: da = DataArray(np.random.randn(5), coords=[("time", range(5))]) time2 = DataArray(np.arange(5), dims="time2") with pytest.raises(ValueError): da.reindex(time=time2) # regression test for #736, reindex can not change complex nums dtype - x = np.array([1, 2, 3], dtype=complex) - x = DataArray(x, coords=[[0.1, 0.2, 0.3]]) + xnp = np.array([1, 2, 3], dtype=complex) + x = DataArray(xnp, coords=[[0.1, 0.2, 0.3]]) y = DataArray([2, 5, 6, 7, 8], coords=[[-1.1, 0.21, 0.31, 0.41, 0.51]]) re_dtype = x.reindex_like(y, method="pad").dtype assert x.dtype == re_dtype @@ -1585,7 +1603,7 @@ def test_reindex_method(self) -> None: assert_identical(expected, actual) @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, {None: 2, "u": 1}]) - def test_reindex_fill_value(self, fill_value): + def test_reindex_fill_value(self, fill_value) -> None: x = DataArray([10, 20], dims="y", coords={"y": [0, 1], "u": ("y", [1, 2])}) y = [0, 1, 2] if fill_value == dtypes.NA: @@ -1606,7 +1624,7 @@ def test_reindex_fill_value(self, fill_value): assert_identical(expected, actual) @pytest.mark.parametrize("dtype", [str, bytes]) - def test_reindex_str_dtype(self, dtype): + def test_reindex_str_dtype(self, dtype) -> None: data = DataArray( [1, 2], dims="x", coords={"x": np.array(["a", "b"], dtype=dtype)} @@ -1618,7 +1636,7 @@ def test_reindex_str_dtype(self, dtype): assert_identical(expected, actual) assert actual.dtype == expected.dtype - def test_rename(self): + def test_rename(self) -> None: renamed = self.dv.rename("bar") assert_identical(renamed.to_dataset(), self.ds.rename({"foo": "bar"})) assert renamed.name == "bar" @@ -1631,7 +1649,7 @@ def test_rename(self): renamed_kwargs = self.dv.x.rename(x="z").rename("z") assert_identical(renamed, renamed_kwargs) - def test_init_value(self): + def test_init_value(self) -> None: expected = DataArray( np.full((3, 4), 3), dims=["x", "y"], coords=[range(3), range(4)] ) @@ -1657,7 +1675,7 @@ def test_init_value(self): with pytest.raises(ValueError, match=r"does not match the 0 dim"): DataArray(np.array(1), coords=[("x", np.arange(10))]) - def test_swap_dims(self): + def test_swap_dims(self) -> None: array = DataArray(np.random.randn(3), {"x": list("abc")}, "x") expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y") actual = array.swap_dims({"x": "y"}) @@ -1682,7 +1700,7 @@ def test_swap_dims(self): for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): assert actual.xindexes[dim_name].equals(expected.xindexes[dim_name]) - def test_expand_dims_error(self): + def test_expand_dims_error(self) -> None: array = DataArray( np.random.randn(3, 4), dims=["x", "dim_0"], @@ -1690,7 +1708,7 @@ def test_expand_dims_error(self): attrs={"key": "entry"}, ) - with pytest.raises(TypeError, match=r"dim should be hashable or"): + with pytest.raises(TypeError, match=r"dim should be Hashable or"): array.expand_dims(0) with pytest.raises(ValueError, match=r"lengths of dim and axis"): # dims and axis argument should be the same length @@ -1728,7 +1746,7 @@ def test_expand_dims_error(self): with pytest.raises(ValueError): array.expand_dims({"d": 4}, e=4) - def test_expand_dims(self): + def test_expand_dims(self) -> None: array = DataArray( np.random.randn(3, 4), dims=["x", "dim_0"], @@ -1786,7 +1804,7 @@ def test_expand_dims(self): roundtripped = actual.squeeze(["y", "z"], drop=True) assert_identical(array, roundtripped) - def test_expand_dims_with_scalar_coordinate(self): + def test_expand_dims_with_scalar_coordinate(self) -> None: array = DataArray( np.random.randn(3, 4), dims=["x", "dim_0"], @@ -1804,7 +1822,7 @@ def test_expand_dims_with_scalar_coordinate(self): roundtripped = actual.squeeze(["z"], drop=False) assert_identical(array, roundtripped) - def test_expand_dims_with_greater_dim_size(self): + def test_expand_dims_with_greater_dim_size(self) -> None: array = DataArray( np.random.randn(3, 4), dims=["x", "dim_0"], @@ -1845,7 +1863,7 @@ def test_expand_dims_with_greater_dim_size(self): ).drop_vars("dim_0") assert_identical(other_way_expected, other_way) - def test_set_index(self): + def test_set_index(self) -> None: indexes = [self.mindex.get_level_values(n) for n in self.mindex.names] coords = {idx.name: ("x", idx) for idx in indexes} array = DataArray(self.mda.values, coords=coords, dims="x") @@ -1876,7 +1894,7 @@ def test_set_index(self): with pytest.raises(ValueError, match=r".*variable\(s\) do not exist"): obj.set_index(x="level_4") - def test_reset_index(self): + def test_reset_index(self) -> None: indexes = [self.mindex.get_level_values(n) for n in self.mindex.names] coords = {idx.name: ("x", idx) for idx in indexes} coords["x"] = ("x", self.mindex.values) @@ -1913,14 +1931,14 @@ def test_reset_index(self): assert_identical(obj, array, check_default_indexes=False) assert len(obj.xindexes) == 0 - def test_reset_index_keep_attrs(self): + def test_reset_index_keep_attrs(self) -> None: coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True}) da = DataArray([1, 0], [coord_1]) obj = da.reset_index("coord_1") assert_identical(obj, da, check_default_indexes=False) assert len(obj.xindexes) == 0 - def test_reorder_levels(self): + def test_reorder_levels(self) -> None: midx = self.mindex.reorder_levels(["level_2", "level_1"]) expected = DataArray(self.mda.values, coords={"x": midx}, dims="x") @@ -1935,11 +1953,11 @@ def test_reorder_levels(self): with pytest.raises(ValueError, match=r"has no MultiIndex"): array.reorder_levels(x=["level_1", "level_2"]) - def test_dataset_getitem(self): + def test_dataset_getitem(self) -> None: dv = self.ds["foo"] assert_identical(dv, self.dv) - def test_array_interface(self): + def test_array_interface(self) -> None: assert_array_equal(np.asarray(self.dv), self.x) # test patched in methods assert_array_equal(self.dv.astype(float), self.v.astype(float)) @@ -1953,27 +1971,27 @@ def test_array_interface(self): bar = Variable(["x", "y"], np.zeros((10, 20))) assert_equal(self.dv, np.maximum(self.dv, bar)) - def test_astype_attrs(self): + def test_astype_attrs(self) -> None: for v in [self.va.copy(), self.mda.copy(), self.ds.copy()]: v.attrs["foo"] = "bar" assert v.attrs == v.astype(float).attrs assert not v.astype(float, keep_attrs=False).attrs - def test_astype_dtype(self): + def test_astype_dtype(self) -> None: original = DataArray([-1, 1, 2, 3, 1000]) converted = original.astype(float) assert_array_equal(original, converted) assert np.issubdtype(original.dtype, np.integer) assert np.issubdtype(converted.dtype, np.floating) - def test_astype_order(self): + def test_astype_order(self) -> None: original = DataArray([[1, 2], [3, 4]]) converted = original.astype("d", order="F") assert_equal(original, converted) assert original.values.flags["C_CONTIGUOUS"] assert converted.values.flags["F_CONTIGUOUS"] - def test_astype_subok(self): + def test_astype_subok(self) -> None: class NdArraySubclass(np.ndarray): pass @@ -1986,7 +2004,7 @@ class NdArraySubclass(np.ndarray): assert not isinstance(converted_not_subok.data, NdArraySubclass) assert isinstance(converted_subok.data, NdArraySubclass) - def test_is_null(self): + def test_is_null(self) -> None: x = np.random.RandomState(42).randn(5, 6) x[x < 0] = np.nan original = DataArray(x, [-np.arange(5), np.arange(6)], ["x", "y"]) @@ -1994,7 +2012,7 @@ def test_is_null(self): assert_identical(expected, original.isnull()) assert_identical(~expected, original.notnull()) - def test_math(self): + def test_math(self) -> None: x = self.x v = self.v a = self.dv @@ -2010,25 +2028,25 @@ def test_math(self): assert_equal(a, a + 0 * a) assert_equal(a, 0 * a + a) - def test_math_automatic_alignment(self): + def test_math_automatic_alignment(self) -> None: a = DataArray(range(5), [("x", range(5))]) b = DataArray(range(5), [("x", range(1, 6))]) expected = DataArray(np.ones(4), [("x", [1, 2, 3, 4])]) assert_identical(a - b, expected) - def test_non_overlapping_dataarrays_return_empty_result(self): + def test_non_overlapping_dataarrays_return_empty_result(self) -> None: a = DataArray(range(5), [("x", range(5))]) result = a.isel(x=slice(2)) + a.isel(x=slice(2, None)) assert len(result["x"]) == 0 - def test_empty_dataarrays_return_empty_result(self): + def test_empty_dataarrays_return_empty_result(self) -> None: a = DataArray(data=[]) result = a * a assert len(result["dim_0"]) == 0 - def test_inplace_math_basics(self): + def test_inplace_math_basics(self) -> None: x = self.x a = self.dv v = a.variable @@ -2039,7 +2057,7 @@ def test_inplace_math_basics(self): assert_array_equal(b.values, x) assert source_ndarray(b.values) is x - def test_inplace_math_error(self): + def test_inplace_math_error(self) -> None: data = np.random.rand(4) times = np.arange(4) foo = DataArray(data, coords=[times], dims=["time"]) @@ -2051,7 +2069,7 @@ def test_inplace_math_error(self): # Check error throwing prevented inplace operation assert_array_equal(foo.coords["time"], b) - def test_inplace_math_automatic_alignment(self): + def test_inplace_math_automatic_alignment(self) -> None: a = DataArray(range(5), [("x", range(5))]) b = DataArray(range(1, 6), [("x", range(1, 6))]) with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"): @@ -2059,7 +2077,7 @@ def test_inplace_math_automatic_alignment(self): with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"): b += a - def test_math_name(self): + def test_math_name(self) -> None: # Verify that name is preserved only when it can be done unambiguously. # The rule (copied from pandas.Series) is keep the current name only if # the other object has the same name or no name attribute and this @@ -2074,7 +2092,7 @@ def test_math_name(self): assert (a["x"] + 0).name == "x" assert (a + a["x"]).name is None - def test_math_with_coords(self): + def test_math_with_coords(self) -> None: coords = { "x": [-1, -2], "y": ["ab", "cd", "ef"], @@ -2129,7 +2147,7 @@ def test_math_with_coords(self): actual = alt + orig assert_identical(expected, actual) - def test_index_math(self): + def test_index_math(self) -> None: orig = DataArray(range(3), dims="x", name="x") actual = orig + 1 expected = DataArray(1 + np.arange(3), dims="x", name="x") @@ -2143,20 +2161,20 @@ def test_index_math(self): actual = orig > orig[0] assert_identical(expected, actual) - def test_dataset_math(self): + def test_dataset_math(self) -> None: # more comprehensive tests with multiple dataset variables obs = Dataset( {"tmin": ("x", np.arange(5)), "tmax": ("x", 10 + np.arange(5))}, {"x": ("x", 0.5 * np.arange(5)), "loc": ("x", range(-2, 3))}, ) - actual = 2 * obs["tmax"] - expected = DataArray(2 * (10 + np.arange(5)), obs.coords, name="tmax") - assert_identical(actual, expected) + actual1 = 2 * obs["tmax"] + expected1 = DataArray(2 * (10 + np.arange(5)), obs.coords, name="tmax") + assert_identical(actual1, expected1) - actual = obs["tmax"] - obs["tmin"] - expected = DataArray(10 * np.ones(5), obs.coords) - assert_identical(actual, expected) + actual2 = obs["tmax"] - obs["tmin"] + expected2 = DataArray(10 * np.ones(5), obs.coords) + assert_identical(actual2, expected2) sim = Dataset( { @@ -2167,29 +2185,29 @@ def test_dataset_math(self): } ) - actual = sim["tmin"] - obs["tmin"] - expected = DataArray(np.ones(5), obs.coords, name="tmin") - assert_identical(actual, expected) + actual3 = sim["tmin"] - obs["tmin"] + expected3 = DataArray(np.ones(5), obs.coords, name="tmin") + assert_identical(actual3, expected3) - actual = -obs["tmin"] + sim["tmin"] - assert_identical(actual, expected) + actual4 = -obs["tmin"] + sim["tmin"] + assert_identical(actual4, expected3) - actual = sim["tmin"].copy() - actual -= obs["tmin"] - assert_identical(actual, expected) + actual5 = sim["tmin"].copy() + actual5 -= obs["tmin"] + assert_identical(actual5, expected3) - actual = sim.copy() - actual["tmin"] = sim["tmin"] - obs["tmin"] - expected = Dataset( + actual6 = sim.copy() + actual6["tmin"] = sim["tmin"] - obs["tmin"] + expected6 = Dataset( {"tmin": ("x", np.ones(5)), "tmax": ("x", sim["tmax"].values)}, obs.coords ) - assert_identical(actual, expected) + assert_identical(actual6, expected6) - actual = sim.copy() - actual["tmin"] -= obs["tmin"] - assert_identical(actual, expected) + actual7 = sim.copy() + actual7["tmin"] -= obs["tmin"] + assert_identical(actual7, expected6) - def test_stack_unstack(self): + def test_stack_unstack(self) -> None: orig = DataArray( [[0, 1], [2, 3]], dims=["x", "y"], @@ -2229,7 +2247,7 @@ def test_stack_unstack(self): unstacked = stacked.unstack() assert_identical(orig, unstacked.transpose(*dims)) - def test_stack_unstack_decreasing_coordinate(self): + def test_stack_unstack_decreasing_coordinate(self) -> None: # regression test for GH980 orig = DataArray( np.random.rand(3, 4), @@ -2240,25 +2258,25 @@ def test_stack_unstack_decreasing_coordinate(self): actual = stacked.unstack("allpoints") assert_identical(orig, actual) - def test_unstack_pandas_consistency(self): + def test_unstack_pandas_consistency(self) -> None: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) s = df.set_index(["x", "y"])["foo"] expected = DataArray(s.unstack(), name="foo") actual = DataArray(s, dims="z").unstack("z") assert_identical(expected, actual) - def test_stack_nonunique_consistency(self, da): + def test_stack_nonunique_consistency(self, da) -> None: da = da.isel(time=0, drop=True) # 2D actual = da.stack(z=["a", "x"]) expected = DataArray(da.to_pandas().stack(), dims="z") assert_identical(expected, actual) - def test_to_unstacked_dataset_raises_value_error(self): + def test_to_unstacked_dataset_raises_value_error(self) -> None: data = DataArray([0, 1], dims="x", coords={"x": [0, 1]}) with pytest.raises(ValueError, match="'x' is not a stacked coordinate"): data.to_unstacked_dataset("x", 0) - def test_transpose(self): + def test_transpose(self) -> None: da = DataArray( np.random.randn(3, 4, 5), dims=("x", "y", "z"), @@ -2306,10 +2324,10 @@ def test_transpose(self): with pytest.warns(UserWarning): da.transpose("not_a_dim", "y", "x", ..., missing_dims="warn") - def test_squeeze(self): + def test_squeeze(self) -> None: assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable) - def test_squeeze_drop(self): + def test_squeeze_drop(self) -> None: array = DataArray([1], [("x", [0])]) expected = DataArray(1) actual = array.squeeze(drop=True) @@ -2333,7 +2351,7 @@ def test_squeeze_drop(self): with pytest.raises(ValueError): array.squeeze(axis=0, dim="dim_1") - def test_drop_coordinates(self): + def test_drop_coordinates(self) -> None: expected = DataArray(np.random.randn(2, 3), dims=["x", "y"]) arr = expected.copy() arr.coords["z"] = 2 @@ -2359,21 +2377,21 @@ def test_drop_coordinates(self): actual = renamed.drop_vars("foo", errors="ignore") assert_identical(actual, renamed) - def test_drop_multiindex_level(self): + def test_drop_multiindex_level(self) -> None: # GH6505 expected = self.mda.drop_vars(["x", "level_1", "level_2"]) with pytest.warns(DeprecationWarning): actual = self.mda.drop_vars("level_1") assert_identical(expected, actual) - def test_drop_all_multiindex_levels(self): + def test_drop_all_multiindex_levels(self) -> None: dim_levels = ["x", "level_1", "level_2"] actual = self.mda.drop_vars(dim_levels) # no error, multi-index dropped for key in dim_levels: assert key not in actual.xindexes - def test_drop_index_labels(self): + def test_drop_index_labels(self) -> None: arr = DataArray(np.random.randn(2, 3), coords={"y": [0, 1, 2]}, dims=["x", "y"]) actual = arr.drop_sel(y=[0, 1]) expected = arr[:, 2:] @@ -2386,15 +2404,15 @@ def test_drop_index_labels(self): assert_identical(actual, expected) with pytest.warns(DeprecationWarning): - arr.drop([0, 1, 3], dim="y", errors="ignore") + arr.drop([0, 1, 3], dim="y", errors="ignore") # type: ignore - def test_drop_index_positions(self): + def test_drop_index_positions(self) -> None: arr = DataArray(np.random.randn(2, 3), dims=["x", "y"]) actual = arr.drop_isel(y=[0, 1]) expected = arr[:, 2:] assert_identical(actual, expected) - def test_dropna(self): + def test_dropna(self) -> None: x = np.random.randn(4, 4) x[::2, 0] = np.nan arr = DataArray(x, dims=["a", "b"]) @@ -2413,25 +2431,25 @@ def test_dropna(self): expected = arr[:, 1:] assert_identical(actual, expected) - def test_where(self): + def test_where(self) -> None: arr = DataArray(np.arange(4), dims="x") expected = arr.sel(x=slice(2)) actual = arr.where(arr.x < 2, drop=True) assert_identical(actual, expected) - def test_where_lambda(self): + def test_where_lambda(self) -> None: arr = DataArray(np.arange(4), dims="y") expected = arr.sel(y=slice(2)) actual = arr.where(lambda x: x.y < 2, drop=True) assert_identical(actual, expected) - def test_where_string(self): + def test_where_string(self) -> None: array = DataArray(["a", "b"]) expected = DataArray(np.array(["a", np.nan], dtype=object)) actual = array.where([True, False]) assert_identical(actual, expected) - def test_cumops(self): + def test_cumops(self) -> None: coords = { "x": [-1, -2], "y": ["ab", "cd", "ef"], @@ -2532,7 +2550,7 @@ def test_reduce_keepdims(self) -> None: assert_equal(actual, expected) @requires_bottleneck - def test_reduce_keepdims_bottleneck(self): + def test_reduce_keepdims_bottleneck(self) -> None: import bottleneck coords = { @@ -2548,7 +2566,7 @@ def test_reduce_keepdims_bottleneck(self): expected = orig.mean(keepdims=True) assert_equal(actual, expected) - def test_reduce_dtype(self): + def test_reduce_dtype(self) -> None: coords = { "x": [-1, -2], "y": ["ab", "cd", "ef"], @@ -2560,7 +2578,7 @@ def test_reduce_dtype(self): for dtype in [np.float16, np.float32, np.float64]: assert orig.astype(float).mean(dtype=dtype).dtype == dtype - def test_reduce_out(self): + def test_reduce_out(self) -> None: coords = { "x": [-1, -2], "y": ["ab", "cd", "ef"], @@ -2625,7 +2643,7 @@ def test_quantile_interpolation_deprecated(self, method) -> None: with pytest.raises(TypeError, match="interpolation and method keywords"): da.quantile(q, method=method, interpolation=method) - def test_reduce_keep_attrs(self): + def test_reduce_keep_attrs(self) -> None: # Test dropped attrs vm = self.va.mean() assert len(vm.attrs) == 0 @@ -2636,7 +2654,7 @@ def test_reduce_keep_attrs(self): assert len(vm.attrs) == len(self.attrs) assert vm.attrs == self.attrs - def test_assign_attrs(self): + def test_assign_attrs(self) -> None: expected = DataArray([], attrs=dict(a=1, b=2)) expected.attrs["a"] = 1 expected.attrs["b"] = 2 @@ -2653,7 +2671,7 @@ def test_assign_attrs(self): @pytest.mark.parametrize( "func", [lambda x: x.clip(0, 1), lambda x: np.float64(1.0) * x, np.abs, abs] ) - def test_propagate_attrs(self, func): + def test_propagate_attrs(self, func) -> None: da = DataArray(self.va) # test defaults @@ -2665,7 +2683,7 @@ def test_propagate_attrs(self, func): with set_options(keep_attrs=True): assert func(da).attrs == da.attrs - def test_fillna(self): + def test_fillna(self) -> None: a = DataArray([np.nan, 1, np.nan, 3], coords={"x": range(4)}, dims="x") actual = a.fillna(-1) expected = DataArray([-1, 1, -1, 3], coords={"x": range(4)}, dims="x") @@ -2691,7 +2709,7 @@ def test_fillna(self): with pytest.raises(ValueError, match=r"broadcast"): a.fillna([1, 2]) - def test_align(self): + def test_align(self) -> None: array = DataArray( np.random.random((6, 8)), coords={"x": list("abcdef")}, dims=["x", "y"] ) @@ -2699,7 +2717,7 @@ def test_align(self): assert_identical(array1, array[:5]) assert_identical(array2, array[:5]) - def test_align_dtype(self): + def test_align_dtype(self) -> None: # regression test for #264 x1 = np.arange(30) x2 = np.arange(5, 35) @@ -2708,7 +2726,7 @@ def test_align_dtype(self): c, d = align(a, b, join="outer") assert c.dtype == np.float32 - def test_align_copy(self): + def test_align_copy(self) -> None: x = DataArray([1, 2, 3], coords=[("a", [1, 2, 3])]) y = DataArray([1, 2], coords=[("a", [3, 1])]) @@ -2735,7 +2753,7 @@ def test_align_copy(self): assert_identical(x, x2) assert source_ndarray(x2.data) is not source_ndarray(x.data) - def test_align_override(self): + def test_align_override(self) -> None: left = DataArray([1, 2, 3], dims="x", coords={"x": [0, 1, 2]}) right = DataArray( np.arange(9).reshape((3, 3)), @@ -2783,13 +2801,13 @@ def test_align_override(self): ], ], ) - def test_align_override_error(self, darrays): + def test_align_override_error(self, darrays) -> None: with pytest.raises( ValueError, match=r"cannot align.*join.*override.*same size" ): xr.align(*darrays, join="override") - def test_align_exclude(self): + def test_align_exclude(self) -> None: x = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, -2]), ("b", [3, 4])]) y = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, 20]), ("b", [5, 6])]) z = DataArray([1], dims=["a"], coords={"a": [20], "b": 7}) @@ -2810,7 +2828,7 @@ def test_align_exclude(self): assert_identical(expected_y2, y2) assert_identical(expected_z2, z2) - def test_align_indexes(self): + def test_align_indexes(self) -> None: x = DataArray([1, 2, 3], coords=[("a", [-1, 10, -2])]) y = DataArray([1, 2], coords=[("a", [-2, -1])]) @@ -2824,13 +2842,13 @@ def test_align_indexes(self): expected_x2 = DataArray([3, np.nan, 2, 1], coords=[("a", [-2, 7, 10, -1])]) assert_identical(expected_x2, x2) - def test_align_without_indexes_exclude(self): + def test_align_without_indexes_exclude(self) -> None: arrays = [DataArray([1, 2, 3], dims=["x"]), DataArray([1, 2], dims=["x"])] result0, result1 = align(*arrays, exclude=["x"]) assert_identical(result0, arrays[0]) assert_identical(result1, arrays[1]) - def test_align_mixed_indexes(self): + def test_align_mixed_indexes(self) -> None: array_no_coord = DataArray([1, 2], dims=["x"]) array_with_coord = DataArray([1, 2], coords=[("x", ["a", "b"])]) result0, result1 = align(array_no_coord, array_with_coord) @@ -2841,7 +2859,7 @@ def test_align_mixed_indexes(self): assert_identical(result0, array_no_coord) assert_identical(result1, array_with_coord) - def test_align_without_indexes_errors(self): + def test_align_without_indexes_errors(self) -> None: with pytest.raises( ValueError, match=r"cannot.*align.*dimension.*conflicting.*sizes.*", @@ -2857,7 +2875,7 @@ def test_align_without_indexes_errors(self): DataArray([1, 2], coords=[("x", [0, 1])]), ) - def test_align_str_dtype(self): + def test_align_str_dtype(self) -> None: a = DataArray([0, 1], dims=["x"], coords={"x": ["a", "b"]}) b = DataArray([1, 2], dims=["x"], coords={"x": ["b", "c"]}) @@ -2877,7 +2895,7 @@ def test_align_str_dtype(self): assert_identical(expected_b, actual_b) assert expected_b.x.dtype == actual_b.x.dtype - def test_broadcast_arrays(self): + def test_broadcast_arrays(self) -> None: x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x") y = DataArray([1, 2], coords=[("b", [3, 4])], name="y") x2, y2 = broadcast(x, y) @@ -2895,7 +2913,7 @@ def test_broadcast_arrays(self): assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) - def test_broadcast_arrays_misaligned(self): + def test_broadcast_arrays_misaligned(self) -> None: # broadcast on misaligned coords must auto-align x = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, -2]), ("b", [3, 4])]) y = DataArray([1, 2], coords=[("a", [-1, 20])]) @@ -2911,7 +2929,7 @@ def test_broadcast_arrays_misaligned(self): assert_identical(expected_x2, x2) assert_identical(expected_y2, y2) - def test_broadcast_arrays_nocopy(self): + def test_broadcast_arrays_nocopy(self) -> None: # Test that input data is not copied over in case # no alteration is needed x = DataArray([1, 2], coords=[("a", [-1, -2])], name="x") @@ -2929,7 +2947,7 @@ def test_broadcast_arrays_nocopy(self): assert_identical(x, x2) assert source_ndarray(x2.data) is source_ndarray(x.data) - def test_broadcast_arrays_exclude(self): + def test_broadcast_arrays_exclude(self) -> None: x = DataArray([[1, 2], [3, 4]], coords=[("a", [-1, -2]), ("b", [3, 4])]) y = DataArray([1, 2], coords=[("a", [-1, 20])]) z = DataArray(5, coords={"b": 5}) @@ -2947,7 +2965,7 @@ def test_broadcast_arrays_exclude(self): assert_identical(expected_y2, y2) assert_identical(expected_z2, z2) - def test_broadcast_coordinates(self): + def test_broadcast_coordinates(self) -> None: # regression test for GH649 ds = Dataset({"a": (["x", "y"], np.ones((5, 6)))}) x_bc, y_bc, a_bc = broadcast(ds.x, ds.y, ds.a) @@ -2959,7 +2977,7 @@ def test_broadcast_coordinates(self): assert_identical(exp_x, x_bc) assert_identical(exp_y, y_bc) - def test_to_pandas(self): + def test_to_pandas(self) -> None: # 0d actual = DataArray(42).to_pandas() expected = np.array(42) @@ -2991,10 +3009,10 @@ def test_to_pandas(self): roundtripped = DataArray(da.to_pandas()).drop_vars(dims) assert_identical(da, roundtripped) - with pytest.raises(ValueError, match=r"cannot convert"): + with pytest.raises(ValueError, match=r"Cannot convert"): DataArray(np.random.randn(1, 2, 3, 4, 5)).to_pandas() - def test_to_dataframe(self): + def test_to_dataframe(self) -> None: # regression test for #260 arr_np = np.random.randn(3, 4) @@ -3028,7 +3046,7 @@ def test_to_dataframe(self): with pytest.raises(ValueError, match=r"unnamed"): arr.to_dataframe() - def test_to_dataframe_multiindex(self): + def test_to_dataframe_multiindex(self) -> None: # regression test for #3008 arr_np = np.random.randn(4, 3) @@ -3043,7 +3061,7 @@ def test_to_dataframe_multiindex(self): assert_array_equal(actual.index.levels[1], ["a", "b"]) assert_array_equal(actual.index.levels[2], [5, 6, 7]) - def test_to_dataframe_0length(self): + def test_to_dataframe_0length(self) -> None: # regression test for #3008 arr_np = np.random.randn(4, 0) @@ -3055,7 +3073,7 @@ def test_to_dataframe_0length(self): assert len(actual) == 0 assert_array_equal(actual.index.names, list("ABC")) - def test_to_pandas_name_matches_coordinate(self): + def test_to_pandas_name_matches_coordinate(self) -> None: # coordinate with same name as array arr = DataArray([1, 2, 3], dims="x", name="x") series = arr.to_series() @@ -3068,7 +3086,7 @@ def test_to_pandas_name_matches_coordinate(self): expected = series.to_frame() assert expected.equals(frame) - def test_to_and_from_series(self): + def test_to_and_from_series(self) -> None: expected = self.dv.to_dataframe()["foo"] actual = self.dv.to_series() assert_array_equal(expected.values, actual.values) @@ -3083,7 +3101,7 @@ def test_to_and_from_series(self): expected_da, DataArray.from_series(actual).drop_vars(["x", "y"]) ) - def test_from_series_multiindex(self): + def test_from_series_multiindex(self) -> None: # GH:3951 df = pd.DataFrame({"B": [1, 2, 3], "A": [4, 5, 6]}) df = df.rename_axis("num").rename_axis("alpha", axis=1) @@ -3092,7 +3110,7 @@ def test_from_series_multiindex(self): assert (actual.sel(alpha="A") == [4, 5, 6]).all() @requires_sparse - def test_from_series_sparse(self): + def test_from_series_sparse(self) -> None: import sparse series = pd.Series([1, 2], index=[("a", 1), ("b", 2)]) @@ -3105,7 +3123,7 @@ def test_from_series_sparse(self): assert_identical(actual_sparse, actual_dense) @requires_sparse - def test_from_multiindex_series_sparse(self): + def test_from_multiindex_series_sparse(self) -> None: # regression test for GH4019 import sparse @@ -3122,7 +3140,7 @@ def test_from_multiindex_series_sparse(self): np.testing.assert_equal(actual_coords, expected_coords) - def test_to_and_from_empty_series(self): + def test_to_and_from_empty_series(self) -> None: # GH697 expected = pd.Series([], dtype=np.float64) da = DataArray.from_series(expected) @@ -3131,7 +3149,7 @@ def test_to_and_from_empty_series(self): assert len(actual) == 0 assert expected.equals(actual) - def test_series_categorical_index(self): + def test_series_categorical_index(self) -> None: # regression test for GH700 if not hasattr(pd, "CategoricalIndex"): pytest.skip("requires pandas with CategoricalIndex") @@ -3205,7 +3223,7 @@ def test_to_and_from_dict(self, encoding) -> None: actual_no_data = array.to_dict(data=False, encoding=encoding) assert expected_no_data == actual_no_data - def test_to_and_from_dict_with_time_dim(self): + def test_to_and_from_dict_with_time_dim(self) -> None: x = np.random.randn(10, 3) t = pd.date_range("20130101", periods=10) lat = [77.7, 83.2, 76] @@ -3213,7 +3231,7 @@ def test_to_and_from_dict_with_time_dim(self): roundtripped = DataArray.from_dict(da.to_dict()) assert_identical(da, roundtripped) - def test_to_and_from_dict_with_nan_nat(self): + def test_to_and_from_dict_with_nan_nat(self) -> None: y = np.random.randn(10, 3) y[2] = np.nan t = pd.Series(pd.date_range("20130101", periods=10)) @@ -3223,7 +3241,7 @@ def test_to_and_from_dict_with_nan_nat(self): roundtripped = DataArray.from_dict(da.to_dict()) assert_identical(da, roundtripped) - def test_to_dict_with_numpy_attrs(self): + def test_to_dict_with_numpy_attrs(self) -> None: # this doesn't need to roundtrip x = np.random.randn(10, 3) t = list("abcdefghij") @@ -3235,8 +3253,8 @@ def test_to_dict_with_numpy_attrs(self): } da = DataArray(x, {"t": t, "lat": lat}, dims=["t", "lat"], attrs=attrs) expected_attrs = { - "created": attrs["created"].item(), - "coords": attrs["coords"].tolist(), + "created": attrs["created"].item(), # type: ignore[attr-defined] + "coords": attrs["coords"].tolist(), # type: ignore[attr-defined] "maintainer": "bar", } actual = da.to_dict() @@ -3244,7 +3262,7 @@ def test_to_dict_with_numpy_attrs(self): # check that they are identical assert expected_attrs == actual["attrs"] - def test_to_masked_array(self): + def test_to_masked_array(self) -> None: rs = np.random.RandomState(44) x = rs.random_sample(size=(10, 20)) x_masked = np.ma.masked_where(x < 0.5, x) @@ -3286,7 +3304,7 @@ def test_to_masked_array(self): ma = da.to_masked_array() assert len(ma.mask) == N - def test_to_and_from_cdms2_classic(self): + def test_to_and_from_cdms2_classic(self) -> None: """Classic with 1D axes""" pytest.importorskip("cdms2") @@ -3325,7 +3343,7 @@ def test_to_and_from_cdms2_classic(self): for coord_name in original.coords.keys(): assert_array_equal(original.coords[coord_name], back.coords[coord_name]) - def test_to_and_from_cdms2_sgrid(self): + def test_to_and_from_cdms2_sgrid(self) -> None: """Curvilinear (structured) grid The rectangular grid case is covered by the classic case @@ -3354,7 +3372,7 @@ def test_to_and_from_cdms2_sgrid(self): assert_array_equal(original.coords["lat"], back.coords["lat"]) assert_array_equal(original.coords["lon"], back.coords["lon"]) - def test_to_and_from_cdms2_ugrid(self): + def test_to_and_from_cdms2_ugrid(self) -> None: """Unstructured grid""" pytest.importorskip("cdms2") @@ -3375,7 +3393,7 @@ def test_to_and_from_cdms2_ugrid(self): assert_array_equal(original.coords["lat"], back.coords["lat"]) assert_array_equal(original.coords["lon"], back.coords["lon"]) - def test_to_dataset_whole(self): + def test_to_dataset_whole(self) -> None: unnamed = DataArray([1, 2], dims="x") with pytest.raises(ValueError, match=r"unable to convert unnamed"): unnamed.to_dataset() @@ -3399,7 +3417,7 @@ def test_to_dataset_whole(self): with pytest.raises(TypeError): actual = named.to_dataset("bar") - def test_to_dataset_split(self): + def test_to_dataset_split(self) -> None: array = DataArray([1, 2, 3], coords=[("x", list("abc"))], attrs={"a": 1}) expected = Dataset({"a": 1, "b": 2, "c": 3}, attrs={"a": 1}) actual = array.to_dataset("x") @@ -3416,7 +3434,7 @@ def test_to_dataset_split(self): actual = array.to_dataset("x") assert_identical(expected, actual) - def test_to_dataset_retains_keys(self): + def test_to_dataset_retains_keys(self) -> None: # use dates as convenient non-str objects. Not a specific date test import datetime @@ -3430,7 +3448,7 @@ def test_to_dataset_retains_keys(self): assert_equal(array, result) - def test__title_for_slice(self): + def test__title_for_slice(self) -> None: array = DataArray( np.ones((4, 3, 2)), dims=["a", "b", "c"], @@ -3444,7 +3462,7 @@ def test__title_for_slice(self): a2 = DataArray(np.ones((4, 1)), dims=["a", "b"]) assert "" == a2._title_for_slice() - def test__title_for_slice_truncate(self): + def test__title_for_slice_truncate(self) -> None: array = DataArray(np.ones(4)) array.coords["a"] = "a" * 100 array.coords["b"] = "b" * 100 @@ -3455,13 +3473,13 @@ def test__title_for_slice_truncate(self): assert nchar == len(title) assert title.endswith("...") - def test_dataarray_diff_n1(self): + def test_dataarray_diff_n1(self) -> None: da = DataArray(np.random.randn(3, 4), dims=["x", "y"]) actual = da.diff("y") expected = DataArray(np.diff(da.values, axis=1), dims=["x", "y"]) assert_equal(expected, actual) - def test_coordinate_diff(self): + def test_coordinate_diff(self) -> None: # regression test for GH634 arr = DataArray(range(0, 20, 2), dims=["lon"], coords=[range(10)]) lon = arr.coords["lon"] @@ -3471,7 +3489,7 @@ def test_coordinate_diff(self): @pytest.mark.parametrize("offset", [-5, 0, 1, 2]) @pytest.mark.parametrize("fill_value, dtype", [(2, int), (dtypes.NA, float)]) - def test_shift(self, offset, fill_value, dtype): + def test_shift(self, offset, fill_value, dtype) -> None: arr = DataArray([1, 2, 3], dims="x") actual = arr.shift(x=1, fill_value=fill_value) if fill_value == dtypes.NA: @@ -3487,19 +3505,19 @@ def test_shift(self, offset, fill_value, dtype): actual = arr.shift(x=offset) assert_identical(expected, actual) - def test_roll_coords(self): + def test_roll_coords(self) -> None: arr = DataArray([1, 2, 3], coords={"x": range(3)}, dims="x") actual = arr.roll(x=1, roll_coords=True) expected = DataArray([3, 1, 2], coords=[("x", [2, 0, 1])]) assert_identical(expected, actual) - def test_roll_no_coords(self): + def test_roll_no_coords(self) -> None: arr = DataArray([1, 2, 3], coords={"x": range(3)}, dims="x") actual = arr.roll(x=1) expected = DataArray([3, 1, 2], coords=[("x", [0, 1, 2])]) assert_identical(expected, actual) - def test_copy_with_data(self): + def test_copy_with_data(self) -> None: orig = DataArray( np.random.random(size=(2, 2)), dims=("x", "y"), @@ -3535,7 +3553,7 @@ def test_copy_with_data(self): ], ], ) - def test_copy_coords(self, deep, expected_orig): + def test_copy_coords(self, deep, expected_orig) -> None: """The test fails for the shallow copy, and apparently only on Windows for some reason. In windows coords seem to be immutable unless it's one dataarray deep copied from another.""" @@ -3556,12 +3574,12 @@ def test_copy_coords(self, deep, expected_orig): assert_identical(da["a"], expected_orig) - def test_real_and_imag(self): + def test_real_and_imag(self) -> None: array = DataArray(1 + 2j) assert_identical(array.real, DataArray(1)) assert_identical(array.imag, DataArray(2)) - def test_setattr_raises(self): + def test_setattr_raises(self) -> None: array = DataArray(0, coords={"scalar": 1}, attrs={"foo": "bar"}) with pytest.raises(AttributeError, match=r"cannot set attr"): array.scalar = 2 @@ -3594,53 +3612,53 @@ def test_full_like(self) -> None: with pytest.raises(ValueError, match="'dtype' cannot be dict-like"): full_like(da, fill_value=True, dtype={"x": bool}) - def test_dot(self): + def test_dot(self) -> None: x = np.linspace(-3, 3, 6) y = np.linspace(-3, 3, 5) z = range(4) da_vals = np.arange(6 * 5 * 4).reshape((6, 5, 4)) da = DataArray(da_vals, coords=[x, y, z], dims=["x", "y", "z"]) - dm_vals = range(4) - dm = DataArray(dm_vals, coords=[z], dims=["z"]) + dm_vals1 = range(4) + dm1 = DataArray(dm_vals1, coords=[z], dims=["z"]) # nd dot 1d - actual = da.dot(dm) - expected_vals = np.tensordot(da_vals, dm_vals, [2, 0]) - expected = DataArray(expected_vals, coords=[x, y], dims=["x", "y"]) - assert_equal(expected, actual) + actual1 = da.dot(dm1) + expected_vals1 = np.tensordot(da_vals, dm_vals1, (2, 0)) + expected1 = DataArray(expected_vals1, coords=[x, y], dims=["x", "y"]) + assert_equal(expected1, actual1) # all shared dims - actual = da.dot(da) - expected_vals = np.tensordot(da_vals, da_vals, axes=([0, 1, 2], [0, 1, 2])) - expected = DataArray(expected_vals) - assert_equal(expected, actual) + actual2 = da.dot(da) + expected_vals2 = np.tensordot(da_vals, da_vals, axes=([0, 1, 2], [0, 1, 2])) + expected2 = DataArray(expected_vals2) + assert_equal(expected2, actual2) # multiple shared dims - dm_vals = np.arange(20 * 5 * 4).reshape((20, 5, 4)) + dm_vals3 = np.arange(20 * 5 * 4).reshape((20, 5, 4)) j = np.linspace(-3, 3, 20) - dm = DataArray(dm_vals, coords=[j, y, z], dims=["j", "y", "z"]) - actual = da.dot(dm) - expected_vals = np.tensordot(da_vals, dm_vals, axes=([1, 2], [1, 2])) - expected = DataArray(expected_vals, coords=[x, j], dims=["x", "j"]) - assert_equal(expected, actual) + dm3 = DataArray(dm_vals3, coords=[j, y, z], dims=["j", "y", "z"]) + actual3 = da.dot(dm3) + expected_vals3 = np.tensordot(da_vals, dm_vals3, axes=([1, 2], [1, 2])) + expected3 = DataArray(expected_vals3, coords=[x, j], dims=["x", "j"]) + assert_equal(expected3, actual3) # Ellipsis: all dims are shared - actual = da.dot(da, dims=...) - expected = da.dot(da) - assert_equal(expected, actual) + actual4 = da.dot(da, dims=...) + expected4 = da.dot(da) + assert_equal(expected4, actual4) # Ellipsis: not all dims are shared - actual = da.dot(dm, dims=...) - expected = da.dot(dm, dims=("j", "x", "y", "z")) - assert_equal(expected, actual) + actual5 = da.dot(dm3, dims=...) + expected5 = da.dot(dm3, dims=("j", "x", "y", "z")) + assert_equal(expected5, actual5) with pytest.raises(NotImplementedError): - da.dot(dm.to_dataset(name="dm")) + da.dot(dm3.to_dataset(name="dm")) # type: ignore with pytest.raises(TypeError): - da.dot(dm.values) + da.dot(dm3.values) # type: ignore - def test_dot_align_coords(self): + def test_dot_align_coords(self) -> None: # GH 3694 x = np.linspace(-3, 3, 6) @@ -3650,36 +3668,36 @@ def test_dot_align_coords(self): da = DataArray(da_vals, coords=[x, y, z_a], dims=["x", "y", "z"]) z_m = range(2, 6) - dm_vals = range(4) - dm = DataArray(dm_vals, coords=[z_m], dims=["z"]) + dm_vals1 = range(4) + dm1 = DataArray(dm_vals1, coords=[z_m], dims=["z"]) with xr.set_options(arithmetic_join="exact"): with pytest.raises( ValueError, match=r"cannot align.*join.*exact.*not equal.*" ): - da.dot(dm) + da.dot(dm1) - da_aligned, dm_aligned = xr.align(da, dm, join="inner") + da_aligned, dm_aligned = xr.align(da, dm1, join="inner") # nd dot 1d - actual = da.dot(dm) - expected_vals = np.tensordot(da_aligned.values, dm_aligned.values, [2, 0]) - expected = DataArray(expected_vals, coords=[x, da_aligned.y], dims=["x", "y"]) - assert_equal(expected, actual) + actual1 = da.dot(dm1) + expected_vals1 = np.tensordot(da_aligned.values, dm_aligned.values, (2, 0)) + expected1 = DataArray(expected_vals1, coords=[x, da_aligned.y], dims=["x", "y"]) + assert_equal(expected1, actual1) # multiple shared dims - dm_vals = np.arange(20 * 5 * 4).reshape((20, 5, 4)) + dm_vals2 = np.arange(20 * 5 * 4).reshape((20, 5, 4)) j = np.linspace(-3, 3, 20) - dm = DataArray(dm_vals, coords=[j, y, z_m], dims=["j", "y", "z"]) - da_aligned, dm_aligned = xr.align(da, dm, join="inner") - actual = da.dot(dm) - expected_vals = np.tensordot( + dm2 = DataArray(dm_vals2, coords=[j, y, z_m], dims=["j", "y", "z"]) + da_aligned, dm_aligned = xr.align(da, dm2, join="inner") + actual2 = da.dot(dm2) + expected_vals2 = np.tensordot( da_aligned.values, dm_aligned.values, axes=([1, 2], [1, 2]) ) - expected = DataArray(expected_vals, coords=[x, j], dims=["x", "j"]) - assert_equal(expected, actual) + expected2 = DataArray(expected_vals2, coords=[x, j], dims=["x", "j"]) + assert_equal(expected2, actual2) - def test_matmul(self): + def test_matmul(self) -> None: # copied from above (could make a fixture) x = np.linspace(-3, 3, 6) @@ -3692,7 +3710,7 @@ def test_matmul(self): expected = da.dot(da) assert_identical(result, expected) - def test_matmul_align_coords(self): + def test_matmul_align_coords(self) -> None: # GH 3694 x_a = np.arange(6) @@ -3712,7 +3730,7 @@ def test_matmul_align_coords(self): ): da_a @ da_b - def test_binary_op_propagate_indexes(self): + def test_binary_op_propagate_indexes(self) -> None: # regression test for GH2227 self.dv["x"] = np.arange(self.dv.sizes["x"]) expected = self.dv.xindexes["x"] @@ -3723,9 +3741,9 @@ def test_binary_op_propagate_indexes(self): actual = (self.dv > 10).xindexes["x"] assert expected is actual - def test_binary_op_join_setting(self): + def test_binary_op_join_setting(self) -> None: dim = "x" - align_type = "outer" + align_type: Final = "outer" coords_l, coords_r = [0, 1, 2], [1, 2, 3] missing_3 = xr.DataArray(coords_l, [(dim, coords_l)]) missing_0 = xr.DataArray(coords_r, [(dim, coords_r)]) @@ -3737,7 +3755,7 @@ def test_binary_op_join_setting(self): expected = xr.DataArray([np.nan, 2, 4, np.nan], [(dim, [0, 1, 2, 3])]) assert_equal(actual, expected) - def test_combine_first(self): + def test_combine_first(self) -> None: ar0 = DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) ar1 = DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar2 = DataArray([2], [("x", ["d"])]) @@ -3762,7 +3780,7 @@ def test_combine_first(self): ) assert_equal(actual, expected) - def test_sortby(self): + def test_sortby(self) -> None: da = DataArray( [[1, 2], [3, 4], [5, 6]], [("x", ["c", "b", "a"]), ("y", [1, 0])] ) @@ -3805,7 +3823,7 @@ def test_sortby(self): assert_equal(actual, expected) @requires_bottleneck - def test_rank(self): + def test_rank(self) -> None: # floats ar = DataArray([[3, 4, np.nan, 1]]) expect_0 = DataArray([[1, 1, np.nan, 1]]) @@ -3826,7 +3844,7 @@ def test_rank(self): @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize("use_datetime", [True, False]) @pytest.mark.filterwarnings("ignore:overflow encountered in multiply") - def test_polyfit(self, use_dask, use_datetime): + def test_polyfit(self, use_dask, use_datetime) -> None: if use_dask and not has_dask: pytest.skip("requires dask") xcoord = xr.DataArray( @@ -3884,7 +3902,7 @@ def test_polyfit(self, use_dask, use_datetime): out = da.polyfit("x", 8, full=True) np.testing.assert_array_equal(out.polyfit_residuals.isnull(), [True, False]) - def test_pad_constant(self): + def test_pad_constant(self) -> None: ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) actual = ar.pad(dim_0=(1, 3)) expected = DataArray( @@ -3918,7 +3936,7 @@ def test_pad_constant(self): ) assert_identical(actual, expected) - def test_pad_coords(self): + def test_pad_coords(self) -> None: ar = DataArray( np.arange(3 * 4 * 5).reshape(3, 4, 5), [("x", np.arange(3)), ("y", np.arange(4)), ("z", np.arange(5))], @@ -3951,7 +3969,7 @@ def test_pad_coords(self): @pytest.mark.parametrize( "stat_length", (None, 3, (1, 3), {"dim_0": (2, 1), "dim_2": (4, 2)}) ) - def test_pad_stat_length(self, mode, stat_length): + def test_pad_stat_length(self, mode, stat_length) -> None: ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) actual = ar.pad(dim_0=(1, 3), dim_2=(2, 2), mode=mode, stat_length=stat_length) if isinstance(stat_length, dict): @@ -3970,7 +3988,7 @@ def test_pad_stat_length(self, mode, stat_length): @pytest.mark.parametrize( "end_values", (None, 3, (3, 5), {"dim_0": (2, 1), "dim_2": (4, 2)}) ) - def test_pad_linear_ramp(self, end_values): + def test_pad_linear_ramp(self, end_values) -> None: ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) actual = ar.pad( dim_0=(1, 3), dim_2=(2, 2), mode="linear_ramp", end_values=end_values @@ -3992,7 +4010,7 @@ def test_pad_linear_ramp(self, end_values): @pytest.mark.parametrize("mode", ("reflect", "symmetric")) @pytest.mark.parametrize("reflect_type", (None, "even", "odd")) - def test_pad_reflect(self, mode, reflect_type): + def test_pad_reflect(self, mode, reflect_type) -> None: ar = DataArray(np.arange(3 * 4 * 5).reshape(3, 4, 5)) actual = ar.pad( @@ -4018,7 +4036,9 @@ def test_pad_reflect(self, mode, reflect_type): @pytest.mark.parametrize( "backend", ["numpy", pytest.param("dask", marks=[requires_dask])] ) - def test_query(self, backend, engine, parser): + def test_query( + self, backend, engine: QueryEngineOptions, parser: QueryParserOptions + ) -> None: """Test querying a dataset.""" # setup test data @@ -4073,7 +4093,7 @@ def test_query(self, backend, engine, parser): # test error handling with pytest.raises(ValueError): - aa.query("a > 5") # must be dict or kwargs + aa.query("a > 5") # type: ignore # must be dict or kwargs with pytest.raises(ValueError): aa.query(x=(a > 5)) # must be query string with pytest.raises(UndefinedVariableError): @@ -4081,7 +4101,7 @@ def test_query(self, backend, engine, parser): @requires_scipy @pytest.mark.parametrize("use_dask", [True, False]) - def test_curvefit(self, use_dask): + def test_curvefit(self, use_dask) -> None: if use_dask and not has_dask: pytest.skip("requires dask") @@ -4115,7 +4135,7 @@ def exp_decay(t, n0, tau=1): assert "a" in fit.param assert "x" not in fit.dims - def test_curvefit_helpers(self): + def test_curvefit_helpers(self) -> None: def exp_decay(t, n0, tau=1): return n0 * np.exp(-t / tau) @@ -4165,7 +4185,7 @@ def setup(self): ], ) class TestReduce1D(TestReduce): - def test_min(self, x, minindex, maxindex, nanindex): + def test_min(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) @@ -4191,7 +4211,7 @@ def test_min(self, x, minindex, maxindex, nanindex): assert_identical(result2, expected2) - def test_max(self, x, minindex, maxindex, nanindex): + def test_max(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) @@ -4220,7 +4240,7 @@ def test_max(self, x, minindex, maxindex, nanindex): @pytest.mark.filterwarnings( "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" ) - def test_argmin(self, x, minindex, maxindex, nanindex): + def test_argmin(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) @@ -4252,7 +4272,7 @@ def test_argmin(self, x, minindex, maxindex, nanindex): @pytest.mark.filterwarnings( "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" ) - def test_argmax(self, x, minindex, maxindex, nanindex): + def test_argmax(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) @@ -4282,7 +4302,7 @@ def test_argmax(self, x, minindex, maxindex, nanindex): assert_identical(result2, expected2) @pytest.mark.parametrize("use_dask", [True, False]) - def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): + def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask) -> None: if use_dask and not has_dask: pytest.skip("requires dask") if use_dask and x.dtype.kind == "M": @@ -4388,7 +4408,7 @@ def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): assert_identical(result7, expected7) @pytest.mark.parametrize("use_dask", [True, False]) - def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): + def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask) -> None: if use_dask and not has_dask: pytest.skip("requires dask") if use_dask and x.dtype.kind == "M": @@ -4496,7 +4516,7 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): @pytest.mark.filterwarnings( "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" ) - def test_argmin_dim(self, x, minindex, maxindex, nanindex): + def test_argmin_dim(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) @@ -4532,7 +4552,7 @@ def test_argmin_dim(self, x, minindex, maxindex, nanindex): @pytest.mark.filterwarnings( "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" ) - def test_argmax_dim(self, x, minindex, maxindex, nanindex): + def test_argmax_dim(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) @@ -4621,7 +4641,7 @@ def test_argmax_dim(self, x, minindex, maxindex, nanindex): ], ) class TestReduce2D(TestReduce): - def test_min(self, x, minindex, maxindex, nanindex): + def test_min(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["y", "x"], @@ -4630,10 +4650,10 @@ def test_min(self, x, minindex, maxindex, nanindex): ) minindex = [x if not np.isnan(x) else 0 for x in minindex] - expected0 = [ + expected0list = [ ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) ] - expected0 = xr.concat(expected0, dim="y") + expected0 = xr.concat(expected0list, dim="y") result0 = ar.min(dim="x", keep_attrs=True) assert_identical(result0, expected0) @@ -4650,17 +4670,17 @@ def test_min(self, x, minindex, maxindex, nanindex): x if y is None or ar.dtype.kind == "O" else y for x, y in zip(minindex, nanindex) ] - expected2 = [ + expected2list = [ ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) ] - expected2 = xr.concat(expected2, dim="y") + expected2 = xr.concat(expected2list, dim="y") expected2.attrs = {} result3 = ar.min(dim="x", skipna=False) assert_identical(result3, expected2) - def test_max(self, x, minindex, maxindex, nanindex): + def test_max(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["y", "x"], @@ -4669,10 +4689,10 @@ def test_max(self, x, minindex, maxindex, nanindex): ) maxindex = [x if not np.isnan(x) else 0 for x in maxindex] - expected0 = [ + expected0list = [ ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) ] - expected0 = xr.concat(expected0, dim="y") + expected0 = xr.concat(expected0list, dim="y") result0 = ar.max(dim="x", keep_attrs=True) assert_identical(result0, expected0) @@ -4689,36 +4709,36 @@ def test_max(self, x, minindex, maxindex, nanindex): x if y is None or ar.dtype.kind == "O" else y for x, y in zip(maxindex, nanindex) ] - expected2 = [ + expected2list = [ ar.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) ] - expected2 = xr.concat(expected2, dim="y") + expected2 = xr.concat(expected2list, dim="y") expected2.attrs = {} result3 = ar.max(dim="x", skipna=False) assert_identical(result3, expected2) - def test_argmin(self, x, minindex, maxindex, nanindex): + def test_argmin(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["y", "x"], coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, attrs=self.attrs, ) - indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) - indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords) + indarrnp = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) + indarr = xr.DataArray(indarrnp, dims=ar.dims, coords=ar.coords) if np.isnan(minindex).any(): with pytest.raises(ValueError): ar.argmin(dim="x") return - expected0 = [ + expected0list = [ indarr.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) ] - expected0 = xr.concat(expected0, dim="y") + expected0 = xr.concat(expected0list, dim="y") result0 = ar.argmin(dim="x") assert_identical(result0, expected0) @@ -4735,37 +4755,37 @@ def test_argmin(self, x, minindex, maxindex, nanindex): x if y is None or ar.dtype.kind == "O" else y for x, y in zip(minindex, nanindex) ] - expected2 = [ + expected2list = [ indarr.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) ] - expected2 = xr.concat(expected2, dim="y") + expected2 = xr.concat(expected2list, dim="y") expected2.attrs = {} result3 = ar.argmin(dim="x", skipna=False) assert_identical(result3, expected2) - def test_argmax(self, x, minindex, maxindex, nanindex): + def test_argmax(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["y", "x"], coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, attrs=self.attrs, ) - indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) - indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords) + indarr_np = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) + indarr = xr.DataArray(indarr_np, dims=ar.dims, coords=ar.coords) if np.isnan(maxindex).any(): with pytest.raises(ValueError): ar.argmax(dim="x") return - expected0 = [ + expected0list = [ indarr.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) ] - expected0 = xr.concat(expected0, dim="y") + expected0 = xr.concat(expected0list, dim="y") result0 = ar.argmax(dim="x") assert_identical(result0, expected0) @@ -4782,11 +4802,11 @@ def test_argmax(self, x, minindex, maxindex, nanindex): x if y is None or ar.dtype.kind == "O" else y for x, y in zip(maxindex, nanindex) ] - expected2 = [ + expected2list = [ indarr.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) ] - expected2 = xr.concat(expected2, dim="y") + expected2 = xr.concat(expected2list, dim="y") expected2.attrs = {} result3 = ar.argmax(dim="x", skipna=False) @@ -4794,7 +4814,7 @@ def test_argmax(self, x, minindex, maxindex, nanindex): assert_identical(result3, expected2) @pytest.mark.parametrize("use_dask", [True, False]) - def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): + def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask) -> None: if use_dask and not has_dask: pytest.skip("requires dask") if use_dask and x.dtype.kind == "M": @@ -4840,11 +4860,11 @@ def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): minindex0 = [x if not np.isnan(x) else 0 for x in minindex] nan_mult_0 = np.array([np.NaN if x else 1 for x in hasna])[:, None] - expected0 = [ + expected0list = [ (coordarr1 * nan_mult_0).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex0) ] - expected0 = xr.concat(expected0, dim="y") + expected0 = xr.concat(expected0list, dim="y") expected0.name = "x" # Default fill value (NaN) @@ -4869,11 +4889,11 @@ def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): x if y is None or ar0.dtype.kind == "O" else y for x, y in zip(minindex0, nanindex) ] - expected3 = [ + expected3list = [ coordarr0.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex3) ] - expected3 = xr.concat(expected3, dim="y") + expected3 = xr.concat(expected3list, dim="y") expected3.name = "x" expected3.attrs = {} @@ -4888,11 +4908,11 @@ def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): # Float fill_value nan_mult_5 = np.array([-1.1 if x else 1 for x in hasna])[:, None] - expected5 = [ + expected5list = [ (coordarr1 * nan_mult_5).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex0) ] - expected5 = xr.concat(expected5, dim="y") + expected5 = xr.concat(expected5list, dim="y") expected5.name = "x" with raise_if_dask_computes(max_computes=max_computes): @@ -4901,11 +4921,11 @@ def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): # Integer fill_value nan_mult_6 = np.array([-1 if x else 1 for x in hasna])[:, None] - expected6 = [ + expected6list = [ (coordarr1 * nan_mult_6).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex0) ] - expected6 = xr.concat(expected6, dim="y") + expected6 = xr.concat(expected6list, dim="y") expected6.name = "x" with raise_if_dask_computes(max_computes=max_computes): @@ -4914,11 +4934,11 @@ def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): # Complex fill_value nan_mult_7 = np.array([-5j if x else 1 for x in hasna])[:, None] - expected7 = [ + expected7list = [ (coordarr1 * nan_mult_7).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex0) ] - expected7 = xr.concat(expected7, dim="y") + expected7 = xr.concat(expected7list, dim="y") expected7.name = "x" with raise_if_dask_computes(max_computes=max_computes): @@ -4926,7 +4946,7 @@ def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): assert_identical(result7, expected7) @pytest.mark.parametrize("use_dask", [True, False]) - def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): + def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask) -> None: if use_dask and not has_dask: pytest.skip("requires dask") if use_dask and x.dtype.kind == "M": @@ -4973,11 +4993,11 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): maxindex0 = [x if not np.isnan(x) else 0 for x in maxindex] nan_mult_0 = np.array([np.NaN if x else 1 for x in hasna])[:, None] - expected0 = [ + expected0list = [ (coordarr1 * nan_mult_0).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex0) ] - expected0 = xr.concat(expected0, dim="y") + expected0 = xr.concat(expected0list, dim="y") expected0.name = "x" # Default fill value (NaN) @@ -5002,11 +5022,11 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): x if y is None or ar0.dtype.kind == "O" else y for x, y in zip(maxindex0, nanindex) ] - expected3 = [ + expected3list = [ coordarr0.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex3) ] - expected3 = xr.concat(expected3, dim="y") + expected3 = xr.concat(expected3list, dim="y") expected3.name = "x" expected3.attrs = {} @@ -5021,11 +5041,11 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): # Float fill_value nan_mult_5 = np.array([-1.1 if x else 1 for x in hasna])[:, None] - expected5 = [ + expected5list = [ (coordarr1 * nan_mult_5).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex0) ] - expected5 = xr.concat(expected5, dim="y") + expected5 = xr.concat(expected5list, dim="y") expected5.name = "x" with raise_if_dask_computes(max_computes=max_computes): @@ -5034,11 +5054,11 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): # Integer fill_value nan_mult_6 = np.array([-1 if x else 1 for x in hasna])[:, None] - expected6 = [ + expected6list = [ (coordarr1 * nan_mult_6).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex0) ] - expected6 = xr.concat(expected6, dim="y") + expected6 = xr.concat(expected6list, dim="y") expected6.name = "x" with raise_if_dask_computes(max_computes=max_computes): @@ -5047,11 +5067,11 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): # Complex fill_value nan_mult_7 = np.array([-5j if x else 1 for x in hasna])[:, None] - expected7 = [ + expected7list = [ (coordarr1 * nan_mult_7).isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex0) ] - expected7 = xr.concat(expected7, dim="y") + expected7 = xr.concat(expected7list, dim="y") expected7.name = "x" with raise_if_dask_computes(max_computes=max_computes): @@ -5061,26 +5081,26 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): @pytest.mark.filterwarnings( "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" ) - def test_argmin_dim(self, x, minindex, maxindex, nanindex): + def test_argmin_dim(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["y", "x"], coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, attrs=self.attrs, ) - indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) - indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords) + indarrnp = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) + indarr = xr.DataArray(indarrnp, dims=ar.dims, coords=ar.coords) if np.isnan(minindex).any(): with pytest.raises(ValueError): ar.argmin(dim="x") return - expected0 = [ + expected0list = [ indarr.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) ] - expected0 = {"x": xr.concat(expected0, dim="y")} + expected0 = {"x": xr.concat(expected0list, dim="y")} result0 = ar.argmin(dim=["x"]) for key in expected0: @@ -5096,11 +5116,11 @@ def test_argmin_dim(self, x, minindex, maxindex, nanindex): x if y is None or ar.dtype.kind == "O" else y for x, y in zip(minindex, nanindex) ] - expected2 = [ + expected2list = [ indarr.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(minindex) ] - expected2 = {"x": xr.concat(expected2, dim="y")} + expected2 = {"x": xr.concat(expected2list, dim="y")} expected2["x"].attrs = {} result2 = ar.argmin(dim=["x"], skipna=False) @@ -5109,7 +5129,8 @@ def test_argmin_dim(self, x, minindex, maxindex, nanindex): assert_identical(result2[key], expected2[key]) result3 = ar.argmin(...) - min_xind = ar.isel(expected0).argmin() + # TODO: remove cast once argmin typing is overloaded + min_xind = cast(DataArray, ar.isel(expected0).argmin()) expected3 = { "y": DataArray(min_xind), "x": DataArray(minindex[min_xind.item()]), @@ -5121,26 +5142,26 @@ def test_argmin_dim(self, x, minindex, maxindex, nanindex): @pytest.mark.filterwarnings( "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning" ) - def test_argmax_dim(self, x, minindex, maxindex, nanindex): + def test_argmax_dim(self, x, minindex, maxindex, nanindex) -> None: ar = xr.DataArray( x, dims=["y", "x"], coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, attrs=self.attrs, ) - indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) - indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords) + indarrnp = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1]) + indarr = xr.DataArray(indarrnp, dims=ar.dims, coords=ar.coords) if np.isnan(maxindex).any(): with pytest.raises(ValueError): ar.argmax(dim="x") return - expected0 = [ + expected0list = [ indarr.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) ] - expected0 = {"x": xr.concat(expected0, dim="y")} + expected0 = {"x": xr.concat(expected0list, dim="y")} result0 = ar.argmax(dim=["x"]) for key in expected0: @@ -5156,11 +5177,11 @@ def test_argmax_dim(self, x, minindex, maxindex, nanindex): x if y is None or ar.dtype.kind == "O" else y for x, y in zip(maxindex, nanindex) ] - expected2 = [ + expected2list = [ indarr.isel(y=yi).isel(x=indi, drop=True) for yi, indi in enumerate(maxindex) ] - expected2 = {"x": xr.concat(expected2, dim="y")} + expected2 = {"x": xr.concat(expected2list, dim="y")} expected2["x"].attrs = {} result2 = ar.argmax(dim=["x"], skipna=False) @@ -5169,7 +5190,8 @@ def test_argmax_dim(self, x, minindex, maxindex, nanindex): assert_identical(result2[key], expected2[key]) result3 = ar.argmax(...) - max_xind = ar.isel(expected0).argmax() + # TODO: remove cast once argmax typing is overloaded + max_xind = cast(DataArray, ar.isel(expected0).argmax()) expected3 = { "y": DataArray(max_xind), "x": DataArray(maxindex[max_xind.item()]), @@ -5807,7 +5829,7 @@ def test_argmax_dim( class TestReduceND(TestReduce): @pytest.mark.parametrize("op", ["idxmin", "idxmax"]) @pytest.mark.parametrize("ndim", [3, 5]) - def test_idxminmax_dask(self, op, ndim): + def test_idxminmax_dask(self, op, ndim) -> None: if not has_dask: pytest.skip("requires dask") @@ -5852,7 +5874,7 @@ def da(request, backend): @pytest.mark.parametrize("da", ("repeating_ints",), indirect=True) -def test_isin(da): +def test_isin(da) -> None: expected = DataArray( np.asarray([[0, 0, 0], [1, 0, 0]]), dims=list("yx"), @@ -5872,7 +5894,7 @@ def test_isin(da): @pytest.mark.parametrize("da", (1, 2), indirect=True) -def test_rolling_iter(da): +def test_rolling_iter(da) -> None: rolling_obj = da.rolling(time=7) rolling_obj_mean = rolling_obj.mean() @@ -5896,7 +5918,7 @@ def test_rolling_iter(da): @pytest.mark.parametrize("da", (1,), indirect=True) -def test_rolling_repr(da): +def test_rolling_repr(da) -> None: rolling_obj = da.rolling(time=7) assert repr(rolling_obj) == "DataArrayRolling [time->7]" rolling_obj = da.rolling(time=7, center=True) @@ -5906,7 +5928,7 @@ def test_rolling_repr(da): @requires_dask -def test_repeated_rolling_rechunks(): +def test_repeated_rolling_rechunks() -> None: # regression test for GH3277, GH2514 dat = DataArray(np.random.rand(7653, 300), dims=("day", "item")) @@ -5914,14 +5936,14 @@ def test_repeated_rolling_rechunks(): dat_chunk.rolling(day=10).mean().rolling(day=250).std() -def test_rolling_doc(da): +def test_rolling_doc(da) -> None: rolling_obj = da.rolling(time=7) # argument substitution worked assert "`mean`" in rolling_obj.mean.__doc__ -def test_rolling_properties(da): +def test_rolling_properties(da) -> None: rolling_obj = da.rolling(time=4) assert rolling_obj.obj.get_axis_num("time") == 1 @@ -5938,7 +5960,7 @@ def test_rolling_properties(da): @pytest.mark.parametrize("center", (True, False, None)) @pytest.mark.parametrize("min_periods", (1, None)) @pytest.mark.parametrize("backend", ["numpy"], indirect=True) -def test_rolling_wrapped_bottleneck(da, name, center, min_periods): +def test_rolling_wrapped_bottleneck(da, name, center, min_periods) -> None: bn = pytest.importorskip("bottleneck", minversion="1.1") # Test all bottleneck functions @@ -5966,7 +5988,7 @@ def test_rolling_wrapped_bottleneck(da, name, center, min_periods): @pytest.mark.parametrize("min_periods", (1, None)) @pytest.mark.parametrize("window", (7, 8)) @pytest.mark.parametrize("backend", ["dask"], indirect=True) -def test_rolling_wrapped_dask(da, name, center, min_periods, window): +def test_rolling_wrapped_dask(da, name, center, min_periods, window) -> None: # dask version rolling_obj = da.rolling(time=window, min_periods=min_periods, center=center) actual = getattr(rolling_obj, name)().load() @@ -5990,7 +6012,7 @@ def test_rolling_wrapped_dask(da, name, center, min_periods, window): @pytest.mark.parametrize("center", (True, None)) -def test_rolling_wrapped_dask_nochunk(center): +def test_rolling_wrapped_dask_nochunk(center) -> None: # GH:2113 pytest.importorskip("dask.array") @@ -6005,7 +6027,7 @@ def test_rolling_wrapped_dask_nochunk(center): @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) -def test_rolling_pandas_compat(center, window, min_periods): +def test_rolling_pandas_compat(center, window, min_periods) -> None: s = pd.Series(np.arange(10)) da = DataArray.from_series(s) @@ -6026,7 +6048,7 @@ def test_rolling_pandas_compat(center, window, min_periods): @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) -def test_rolling_construct(center, window): +def test_rolling_construct(center, window) -> None: s = pd.Series(np.arange(10)) da = DataArray.from_series(s) @@ -6055,7 +6077,7 @@ def test_rolling_construct(center, window): @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "mean", "std", "max")) -def test_rolling_reduce(da, center, min_periods, window, name): +def test_rolling_reduce(da, center, min_periods, window, name) -> None: if min_periods is not None and window < min_periods: min_periods = window @@ -6076,7 +6098,7 @@ def test_rolling_reduce(da, center, min_periods, window, name): @pytest.mark.parametrize("min_periods", (None, 1, 2, 3)) @pytest.mark.parametrize("window", (1, 2, 3, 4)) @pytest.mark.parametrize("name", ("sum", "max")) -def test_rolling_reduce_nonnumeric(center, min_periods, window, name): +def test_rolling_reduce_nonnumeric(center, min_periods, window, name) -> None: da = DataArray( [0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time" ).isnull() @@ -6093,10 +6115,10 @@ def test_rolling_reduce_nonnumeric(center, min_periods, window, name): assert actual.dims == expected.dims -def test_rolling_count_correct(): +def test_rolling_count_correct() -> None: da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time") - kwargs = [ + kwargs: list[dict[str, Any]] = [ {"time": 11, "min_periods": 1}, {"time": 11, "min_periods": None}, {"time": 7, "min_periods": 2}, @@ -6134,7 +6156,7 @@ def test_rolling_count_correct(): @pytest.mark.parametrize("center", (True, False)) @pytest.mark.parametrize("min_periods", (None, 1)) @pytest.mark.parametrize("name", ("sum", "mean", "max")) -def test_ndrolling_reduce(da, center, min_periods, name): +def test_ndrolling_reduce(da, center, min_periods, name) -> None: rolling_obj = da.rolling(time=3, x=2, center=center, min_periods=min_periods) actual = getattr(rolling_obj, name)() @@ -6161,7 +6183,7 @@ def test_ndrolling_reduce(da, center, min_periods, name): @pytest.mark.parametrize("center", (True, False, (True, False))) @pytest.mark.parametrize("fill_value", (np.nan, 0.0)) -def test_ndrolling_construct(center, fill_value): +def test_ndrolling_construct(center, fill_value) -> None: da = DataArray( np.arange(5 * 6 * 7).reshape(5, 6, 7).astype(float), dims=["x", "y", "z"], @@ -6190,7 +6212,7 @@ def test_ndrolling_construct(center, fill_value): ("count", ()), ], ) -def test_rolling_keep_attrs(funcname, argument): +def test_rolling_keep_attrs(funcname, argument) -> None: attrs_da = {"da_attr": "test"} data = np.linspace(10, 15, 100) @@ -6233,17 +6255,17 @@ def test_rolling_keep_attrs(funcname, argument): assert result.name == "name" -def test_raise_no_warning_for_nan_in_binary_ops(): +def test_raise_no_warning_for_nan_in_binary_ops() -> None: with assert_no_warnings(): xr.DataArray([1, 2, np.NaN]) > 0 @pytest.mark.filterwarnings("error") -def test_no_warning_for_all_nan(): +def test_no_warning_for_all_nan() -> None: _ = xr.DataArray([np.NaN, np.NaN]).mean() -def test_name_in_masking(): +def test_name_in_masking() -> None: name = "RingoStarr" da = xr.DataArray(range(10), coords=[("x", range(10))], name=name) assert da.where(da > 5).name == name @@ -6254,12 +6276,12 @@ def test_name_in_masking(): class TestIrisConversion: @requires_iris - def test_to_and_from_iris(self): + def test_to_and_from_iris(self) -> None: import cf_units # iris requirement import iris # to iris - coord_dict = {} + coord_dict: dict[Hashable, Any] = {} coord_dict["distance"] = ("distance", [-2, 2], {"units": "meters"}) coord_dict["time"] = ("time", pd.date_range("2000-01-01", periods=3)) coord_dict["height"] = 10 @@ -6325,12 +6347,12 @@ def test_to_and_from_iris(self): @requires_iris @requires_dask - def test_to_and_from_iris_dask(self): + def test_to_and_from_iris_dask(self) -> None: import cf_units # iris requirement import dask.array as da import iris - coord_dict = {} + coord_dict: dict[Hashable, Any] = {} coord_dict["distance"] = ("distance", [-2, 2], {"units": "meters"}) coord_dict["time"] = ("time", pd.date_range("2000-01-01", periods=3)) coord_dict["height"] = 10 @@ -6427,15 +6449,14 @@ def test_to_and_from_iris_dask(self): (None, None, None, None, {}), ], ) - def test_da_name_from_cube(self, std_name, long_name, var_name, name, attrs): + def test_da_name_from_cube( + self, std_name, long_name, var_name, name, attrs + ) -> None: from iris.cube import Cube - data = [] - cube = Cube( - data, var_name=var_name, standard_name=std_name, long_name=long_name - ) + cube = Cube([], var_name=var_name, standard_name=std_name, long_name=long_name) result = xr.DataArray.from_iris(cube) - expected = xr.DataArray(data, name=name, attrs=attrs) + expected = xr.DataArray([], name=name, attrs=attrs) xr.testing.assert_identical(result, expected) @requires_iris @@ -6460,7 +6481,9 @@ def test_da_name_from_cube(self, std_name, long_name, var_name, name, attrs): (None, None, None, "unknown", {}), ], ) - def test_da_coord_name_from_cube(self, std_name, long_name, var_name, name, attrs): + def test_da_coord_name_from_cube( + self, std_name, long_name, var_name, name, attrs + ) -> None: from iris.coords import DimCoord from iris.cube import Cube @@ -6474,7 +6497,7 @@ def test_da_coord_name_from_cube(self, std_name, long_name, var_name, name, attr xr.testing.assert_identical(result, expected) @requires_iris - def test_prevent_duplicate_coord_names(self): + def test_prevent_duplicate_coord_names(self) -> None: from iris.coords import DimCoord from iris.cube import Cube @@ -6496,7 +6519,7 @@ def test_prevent_duplicate_coord_names(self): "coord_values", [["IA", "IL", "IN"], [0, 2, 1]], # non-numeric values # non-monotonic values ) - def test_fallback_to_iris_AuxCoord(self, coord_values): + def test_fallback_to_iris_AuxCoord(self, coord_values) -> None: from iris.coords import AuxCoord from iris.cube import Cube @@ -6516,7 +6539,7 @@ def test_fallback_to_iris_AuxCoord(self, coord_values): ) @pytest.mark.parametrize("backend", ["numpy"], indirect=True) @pytest.mark.parametrize("func", ["mean", "sum"]) -def test_rolling_exp_runs(da, dim, window_type, window, func): +def test_rolling_exp_runs(da, dim, window_type, window, func) -> None: import numbagg if ( @@ -6538,7 +6561,7 @@ def test_rolling_exp_runs(da, dim, window_type, window, func): "window_type, window", [["span", 5], ["alpha", 0.5], ["com", 0.5], ["halflife", 5]] ) @pytest.mark.parametrize("backend", ["numpy"], indirect=True) -def test_rolling_exp_mean_pandas(da, dim, window_type, window): +def test_rolling_exp_mean_pandas(da, dim, window_type, window) -> None: da = da.isel(a=0).where(lambda x: x > 0.2) result = da.rolling_exp(window_type=window_type, **{dim: window}).mean() @@ -6558,7 +6581,7 @@ def test_rolling_exp_mean_pandas(da, dim, window_type, window): @requires_numbagg @pytest.mark.parametrize("backend", ["numpy"], indirect=True) @pytest.mark.parametrize("func", ["mean", "sum"]) -def test_rolling_exp_keep_attrs(da, func): +def test_rolling_exp_keep_attrs(da, func) -> None: import numbagg if ( @@ -6601,13 +6624,13 @@ def test_rolling_exp_keep_attrs(da, func): da.rolling_exp(time=10, keep_attrs=True) -def test_no_dict(): +def test_no_dict() -> None: d = DataArray() with pytest.raises(AttributeError): d.__dict__ -def test_subclass_slots(): +def test_subclass_slots() -> None: """Test that DataArray subclasses must explicitly define ``__slots__``. .. note:: @@ -6622,7 +6645,7 @@ class MyArray(DataArray): assert str(e.value) == "MyArray must explicitly define __slots__" -def test_weakref(): +def test_weakref() -> None: """Classes with __slots__ are incompatible with the weakref module unless they explicitly state __weakref__ among their slots """ @@ -6633,7 +6656,7 @@ def test_weakref(): assert r() is a -def test_delete_coords(): +def test_delete_coords() -> None: """Make sure that deleting a coordinate doesn't corrupt the DataArray. See issue #3899. @@ -6658,13 +6681,13 @@ def test_delete_coords(): assert set(a1.coords.keys()) == {"x"} -def test_deepcopy_obj_array(): +def test_deepcopy_obj_array() -> None: x0 = DataArray(np.array([object()])) x1 = deepcopy(x0) assert x0.values[0] is not x1.values[0] -def test_clip(da): +def test_clip(da) -> None: with raise_if_dask_computes(): result = da.clip(min=0.5) assert result.min(...) >= 0.5 @@ -6698,7 +6721,7 @@ def test_clip(da): class TestDropDuplicates: @pytest.mark.parametrize("keep", ["first", "last", False]) - def test_drop_duplicates_1d(self, keep): + def test_drop_duplicates_1d(self, keep) -> None: da = xr.DataArray( [0, 5, 6, 7], dims="time", coords={"time": [0, 0, 1, 2]}, name="test" ) @@ -6720,7 +6743,7 @@ def test_drop_duplicates_1d(self, keep): with pytest.raises(ValueError, match="['space'] not found"): da.drop_duplicates("space", keep=keep) - def test_drop_duplicates_2d(self): + def test_drop_duplicates_2d(self) -> None: da = xr.DataArray( [[0, 5, 6, 7], [2, 1, 3, 4]], dims=["space", "time"], @@ -6744,7 +6767,7 @@ def test_drop_duplicates_2d(self): class TestNumpyCoercion: # TODO once flexible indexes refactor complete also test coercion of dimension coords - def test_from_numpy(self): + def test_from_numpy(self) -> None: da = xr.DataArray([1, 2, 3], dims="x", coords={"lat": ("x", [4, 5, 6])}) assert_identical(da.as_numpy(), da) @@ -6752,7 +6775,7 @@ def test_from_numpy(self): np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6])) @requires_dask - def test_from_dask(self): + def test_from_dask(self) -> None: da = xr.DataArray([1, 2, 3], dims="x", coords={"lat": ("x", [4, 5, 6])}) da_chunked = da.chunk(1) @@ -6761,7 +6784,7 @@ def test_from_dask(self): np.testing.assert_equal(da["lat"].to_numpy(), np.array([4, 5, 6])) @requires_pint - def test_from_pint(self): + def test_from_pint(self) -> None: from pint import Quantity arr = np.array([1, 2, 3]) @@ -6777,7 +6800,7 @@ def test_from_pint(self): np.testing.assert_equal(da["lat"].to_numpy(), arr + 3) @requires_sparse - def test_from_sparse(self): + def test_from_sparse(self) -> None: import sparse arr = np.diagflat([1, 2, 3]) @@ -6793,7 +6816,7 @@ def test_from_sparse(self): np.testing.assert_equal(da.to_numpy(), arr) @requires_cupy - def test_from_cupy(self): + def test_from_cupy(self) -> None: import cupy as cp arr = np.array([1, 2, 3]) @@ -6807,7 +6830,7 @@ def test_from_cupy(self): @requires_dask @requires_pint - def test_from_pint_wrapping_dask(self): + def test_from_pint_wrapping_dask(self) -> None: import dask from pint import Quantity @@ -6828,13 +6851,13 @@ def test_from_pint_wrapping_dask(self): class TestStackEllipsis: # https://github.com/pydata/xarray/issues/6051 - def test_result_as_expected(self): + def test_result_as_expected(self) -> None: da = DataArray([[1, 2], [1, 2]], dims=("x", "y")) result = da.stack(flat=[...]) expected = da.stack(flat=da.dims) assert_identical(result, expected) - def test_error_on_ellipsis_without_list(self): + def test_error_on_ellipsis_without_list(self) -> None: da = DataArray([[1, 2], [1, 2]], dims=("x", "y")) with pytest.raises(ValueError): - da.stack(flat=...) + da.stack(flat=...) # type: ignore diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py index 2a6de0be550..d62254dd327 100644 --- a/xarray/tests/test_interp.py +++ b/xarray/tests/test_interp.py @@ -1,10 +1,12 @@ from itertools import combinations, permutations +from typing import cast import numpy as np import pandas as pd import pytest import xarray as xr +from xarray.core.types import InterpOptions from xarray.tests import ( assert_allclose, assert_equal, @@ -24,22 +26,25 @@ pass -def get_example_data(case): - x = np.linspace(0, 1, 100) - y = np.linspace(0, 0.1, 30) - data = xr.DataArray( - np.sin(x[:, np.newaxis]) * np.cos(y), - dims=["x", "y"], - coords={"x": x, "y": y, "x2": ("x", x**2)}, - ) +def get_example_data(case: int) -> xr.DataArray: if case == 0: - return data + # 2D + x = np.linspace(0, 1, 100) + y = np.linspace(0, 0.1, 30) + return xr.DataArray( + np.sin(x[:, np.newaxis]) * np.cos(y), + dims=["x", "y"], + coords={"x": x, "y": y, "x2": ("x", x**2)}, + ) elif case == 1: - return data.chunk({"y": 3}) + # 2D chunked single dim + return get_example_data(0).chunk({"y": 3}) elif case == 2: - return data.chunk({"x": 25, "y": 3}) + # 2D chunked both dims + return get_example_data(0).chunk({"x": 25, "y": 3}) elif case == 3: + # 3D x = np.linspace(0, 1, 100) y = np.linspace(0, 0.1, 30) z = np.linspace(0.1, 0.2, 10) @@ -49,7 +54,10 @@ def get_example_data(case): coords={"x": x, "y": y, "x2": ("x", x**2), "z": z}, ) elif case == 4: + # 3D chunked single dim return get_example_data(3).chunk({"z": 5}) + else: + raise ValueError("case must be 1-4") def test_keywargs(): @@ -62,8 +70,10 @@ def test_keywargs(): @pytest.mark.parametrize("method", ["linear", "cubic"]) @pytest.mark.parametrize("dim", ["x", "y"]) -@pytest.mark.parametrize("case", [0, 1]) -def test_interpolate_1d(method, dim, case): +@pytest.mark.parametrize( + "case", [pytest.param(0, id="no_chunk"), pytest.param(1, id="chunk_y")] +) +def test_interpolate_1d(method: InterpOptions, dim: str, case: int) -> None: if not has_scipy: pytest.skip("scipy is not installed.") @@ -72,7 +82,7 @@ def test_interpolate_1d(method, dim, case): da = get_example_data(case) xdest = np.linspace(0.0, 0.9, 80) - actual = da.interp(method=method, **{dim: xdest}) + actual = da.interp(method=method, coords={dim: xdest}) # scipy interpolation for the reference def func(obj, new_x): @@ -95,7 +105,7 @@ def func(obj, new_x): @pytest.mark.parametrize("method", ["cubic", "zero"]) -def test_interpolate_1d_methods(method): +def test_interpolate_1d_methods(method: InterpOptions) -> None: if not has_scipy: pytest.skip("scipy is not installed.") @@ -103,7 +113,7 @@ def test_interpolate_1d_methods(method): dim = "x" xdest = np.linspace(0.0, 0.9, 80) - actual = da.interp(method=method, **{dim: xdest}) + actual = da.interp(method=method, coords={dim: xdest}) # scipy interpolation for the reference def func(obj, new_x): @@ -122,7 +132,7 @@ def func(obj, new_x): @pytest.mark.parametrize("use_dask", [False, True]) -def test_interpolate_vectorize(use_dask): +def test_interpolate_vectorize(use_dask: bool) -> None: if not has_scipy: pytest.skip("scipy is not installed.") @@ -197,8 +207,10 @@ def func(obj, dim, new_x): assert_allclose(actual, expected.transpose("z", "w", "y", transpose_coords=True)) -@pytest.mark.parametrize("case", [3, 4]) -def test_interpolate_nd(case): +@pytest.mark.parametrize( + "case", [pytest.param(3, id="no_chunk"), pytest.param(4, id="chunked")] +) +def test_interpolate_nd(case: int) -> None: if not has_scipy: pytest.skip("scipy is not installed.") @@ -208,13 +220,13 @@ def test_interpolate_nd(case): da = get_example_data(case) # grid -> grid - xdest = np.linspace(0.1, 1.0, 11) - ydest = np.linspace(0.0, 0.2, 10) - actual = da.interp(x=xdest, y=ydest, method="linear") + xdestnp = np.linspace(0.1, 1.0, 11) + ydestnp = np.linspace(0.0, 0.2, 10) + actual = da.interp(x=xdestnp, y=ydestnp, method="linear") # linear interpolation is separateable - expected = da.interp(x=xdest, method="linear") - expected = expected.interp(y=ydest, method="linear") + expected = da.interp(x=xdestnp, method="linear") + expected = expected.interp(y=ydestnp, method="linear") assert_allclose(actual.transpose("x", "y", "z"), expected.transpose("x", "y", "z")) # grid -> 1d-sample @@ -248,7 +260,7 @@ def test_interpolate_nd(case): @requires_scipy -def test_interpolate_nd_nd(): +def test_interpolate_nd_nd() -> None: """Interpolate nd array with an nd indexer sharing coordinates.""" # Create original array a = [0, 2] @@ -278,7 +290,7 @@ def test_interpolate_nd_nd(): @requires_scipy -def test_interpolate_nd_with_nan(): +def test_interpolate_nd_with_nan() -> None: """Interpolate an array with an nd indexer and `NaN` values.""" # Create indexer into `a` with dimensions (y, x) @@ -298,14 +310,16 @@ def test_interpolate_nd_with_nan(): db = 2 * da ds = xr.Dataset({"da": da, "db": db}) - out = ds.interp(a=ia) + out2 = ds.interp(a=ia) expected_ds = xr.Dataset({"da": expected, "db": 2 * expected}) - xr.testing.assert_allclose(out.drop_vars("a"), expected_ds) + xr.testing.assert_allclose(out2.drop_vars("a"), expected_ds) @pytest.mark.parametrize("method", ["linear"]) -@pytest.mark.parametrize("case", [0, 1]) -def test_interpolate_scalar(method, case): +@pytest.mark.parametrize( + "case", [pytest.param(0, id="no_chunk"), pytest.param(1, id="chunk_y")] +) +def test_interpolate_scalar(method: InterpOptions, case: int) -> None: if not has_scipy: pytest.skip("scipy is not installed.") @@ -333,8 +347,10 @@ def func(obj, new_x): @pytest.mark.parametrize("method", ["linear"]) -@pytest.mark.parametrize("case", [3, 4]) -def test_interpolate_nd_scalar(method, case): +@pytest.mark.parametrize( + "case", [pytest.param(3, id="no_chunk"), pytest.param(4, id="chunked")] +) +def test_interpolate_nd_scalar(method: InterpOptions, case: int) -> None: if not has_scipy: pytest.skip("scipy is not installed.") @@ -361,7 +377,7 @@ def test_interpolate_nd_scalar(method, case): @pytest.mark.parametrize("use_dask", [True, False]) -def test_nans(use_dask): +def test_nans(use_dask: bool) -> None: if not has_scipy: pytest.skip("scipy is not installed.") @@ -377,7 +393,7 @@ def test_nans(use_dask): @pytest.mark.parametrize("use_dask", [True, False]) -def test_errors(use_dask): +def test_errors(use_dask: bool) -> None: if not has_scipy: pytest.skip("scipy is not installed.") @@ -389,7 +405,7 @@ def test_errors(use_dask): for method in ["akima", "spline"]: with pytest.raises(ValueError): - da.interp(x=[0.5, 1.5], method=method) + da.interp(x=[0.5, 1.5], method=method) # type: ignore # not sorted if use_dask: @@ -404,9 +420,9 @@ def test_errors(use_dask): # invalid method with pytest.raises(ValueError): - da.interp(x=[2, 0], method="boo") + da.interp(x=[2, 0], method="boo") # type: ignore with pytest.raises(ValueError): - da.interp(y=[2, 0], method="boo") + da.interp(y=[2, 0], method="boo") # type: ignore # object-type DataArray cannot be interpolated da = xr.DataArray(["a", "b", "c"], dims="x", coords={"x": [0, 1, 2]}) @@ -415,7 +431,7 @@ def test_errors(use_dask): @requires_scipy -def test_dtype(): +def test_dtype() -> None: data_vars = dict( a=("time", np.array([1, 1.25, 2])), b=("time", np.array([True, True, False], dtype=bool)), @@ -432,7 +448,7 @@ def test_dtype(): @requires_scipy -def test_sorted(): +def test_sorted() -> None: # unsorted non-uniform gridded data x = np.random.randn(100) y = np.random.randn(30) @@ -459,7 +475,7 @@ def test_sorted(): @requires_scipy -def test_dimension_wo_coords(): +def test_dimension_wo_coords() -> None: da = xr.DataArray( np.arange(12).reshape(3, 4), dims=["x", "y"], coords={"y": [0, 1, 2, 3]} ) @@ -474,7 +490,7 @@ def test_dimension_wo_coords(): @requires_scipy -def test_dataset(): +def test_dataset() -> None: ds = create_test_data() ds.attrs["foo"] = "var" ds["var1"].attrs["buz"] = "var2" @@ -497,8 +513,8 @@ def test_dataset(): assert interpolated["var1"].attrs["buz"] == "var2" -@pytest.mark.parametrize("case", [0, 3]) -def test_interpolate_dimorder(case): +@pytest.mark.parametrize("case", [pytest.param(0, id="2D"), pytest.param(3, id="3D")]) +def test_interpolate_dimorder(case: int) -> None: """Make sure the resultant dimension order is consistent with .sel()""" if not has_scipy: pytest.skip("scipy is not installed.") @@ -546,7 +562,7 @@ def test_interpolate_dimorder(case): @requires_scipy -def test_interp_like(): +def test_interp_like() -> None: ds = create_test_data() ds.attrs["foo"] = "var" ds["var1"].attrs["buz"] = "var2" @@ -588,7 +604,7 @@ def test_interp_like(): pytest.param("2000-01-01T12:00", 0.5, marks=pytest.mark.xfail), ], ) -def test_datetime(x_new, expected): +def test_datetime(x_new, expected) -> None: da = xr.DataArray( np.arange(24), dims="time", @@ -606,7 +622,7 @@ def test_datetime(x_new, expected): @requires_scipy -def test_datetime_single_string(): +def test_datetime_single_string() -> None: da = xr.DataArray( np.arange(24), dims="time", @@ -620,7 +636,7 @@ def test_datetime_single_string(): @requires_cftime @requires_scipy -def test_cftime(): +def test_cftime() -> None: times = xr.cftime_range("2000", periods=24, freq="D") da = xr.DataArray(np.arange(24), coords=[times], dims="time") @@ -633,7 +649,7 @@ def test_cftime(): @requires_cftime @requires_scipy -def test_cftime_type_error(): +def test_cftime_type_error() -> None: times = xr.cftime_range("2000", periods=24, freq="D") da = xr.DataArray(np.arange(24), coords=[times], dims="time") @@ -646,7 +662,7 @@ def test_cftime_type_error(): @requires_cftime @requires_scipy -def test_cftime_list_of_strings(): +def test_cftime_list_of_strings() -> None: from cftime import DatetimeProlepticGregorian times = xr.cftime_range( @@ -667,7 +683,7 @@ def test_cftime_list_of_strings(): @requires_cftime @requires_scipy -def test_cftime_single_string(): +def test_cftime_single_string() -> None: from cftime import DatetimeProlepticGregorian times = xr.cftime_range( @@ -687,7 +703,7 @@ def test_cftime_single_string(): @requires_scipy -def test_datetime_to_non_datetime_error(): +def test_datetime_to_non_datetime_error() -> None: da = xr.DataArray( np.arange(24), dims="time", @@ -699,7 +715,7 @@ def test_datetime_to_non_datetime_error(): @requires_cftime @requires_scipy -def test_cftime_to_non_cftime_error(): +def test_cftime_to_non_cftime_error() -> None: times = xr.cftime_range("2000", periods=24, freq="D") da = xr.DataArray(np.arange(24), coords=[times], dims="time") @@ -708,7 +724,7 @@ def test_cftime_to_non_cftime_error(): @requires_scipy -def test_datetime_interp_noerror(): +def test_datetime_interp_noerror() -> None: # GH:2667 a = xr.DataArray( np.arange(21).reshape(3, 7), @@ -728,7 +744,7 @@ def test_datetime_interp_noerror(): @requires_cftime @requires_scipy -def test_3641(): +def test_3641() -> None: times = xr.cftime_range("0001", periods=3, freq="500Y") da = xr.DataArray(range(3), dims=["time"], coords=[times]) da.interp(time=["0002-05-01"]) @@ -736,7 +752,7 @@ def test_3641(): @requires_scipy @pytest.mark.parametrize("method", ["nearest", "linear"]) -def test_decompose(method): +def test_decompose(method: InterpOptions) -> None: da = xr.DataArray( np.arange(6).reshape(3, 2), dims=["x", "y"], @@ -769,7 +785,9 @@ def test_decompose(method): for nscalar in range(0, interp_ndim + 1) ], ) -def test_interpolate_chunk_1d(method, data_ndim, interp_ndim, nscalar, chunked): +def test_interpolate_chunk_1d( + method: InterpOptions, data_ndim, interp_ndim, nscalar, chunked: bool +) -> None: """Interpolate nd array with multiple independent indexers It should do a series of 1d interpolation @@ -812,12 +830,15 @@ def test_interpolate_chunk_1d(method, data_ndim, interp_ndim, nscalar, chunked): before = 2 * da.coords[dim][0] - da.coords[dim][1] after = 2 * da.coords[dim][-1] - da.coords[dim][-2] - dest[dim] = np.linspace(before, after, len(da.coords[dim]) * 13) + dest[dim] = cast( + xr.DataArray, + np.linspace(before, after, len(da.coords[dim]) * 13), + ) if chunked: dest[dim] = xr.DataArray(data=dest[dim], dims=[dim]) dest[dim] = dest[dim].chunk(2) - actual = da.interp(method=method, **dest, kwargs=kwargs) - expected = da.compute().interp(method=method, **dest, kwargs=kwargs) + actual = da.interp(method=method, **dest, kwargs=kwargs) # type: ignore + expected = da.compute().interp(method=method, **dest, kwargs=kwargs) # type: ignore assert_identical(actual, expected) @@ -831,7 +852,7 @@ def test_interpolate_chunk_1d(method, data_ndim, interp_ndim, nscalar, chunked): @requires_dask @pytest.mark.parametrize("method", ["linear", "nearest"]) @pytest.mark.filterwarnings("ignore:Increasing number of chunks") -def test_interpolate_chunk_advanced(method): +def test_interpolate_chunk_advanced(method: InterpOptions) -> None: """Interpolate nd array with an nd indexer sharing coordinates.""" # Create original array x = np.linspace(-1, 1, 5) @@ -857,25 +878,25 @@ def test_interpolate_chunk_advanced(method): coords=[("w", w), ("theta", theta)], ) - x = r * np.cos(theta) - y = r * np.sin(theta) - z = xr.DataArray( + xda = r * np.cos(theta) + yda = r * np.sin(theta) + zda = xr.DataArray( data=w[:, np.newaxis] * np.sin(theta), coords=[("w", w), ("theta", theta)], ) kwargs = {"fill_value": None} - expected = da.interp(t=0.5, x=x, y=y, z=z, kwargs=kwargs, method=method) + expected = da.interp(t=0.5, x=xda, y=yda, z=zda, kwargs=kwargs, method=method) da = da.chunk(2) - x = x.chunk(1) - z = z.chunk(3) - actual = da.interp(t=0.5, x=x, y=y, z=z, kwargs=kwargs, method=method) + xda = xda.chunk(1) + zda = zda.chunk(3) + actual = da.interp(t=0.5, x=xda, y=yda, z=zda, kwargs=kwargs, method=method) assert_identical(actual, expected) @requires_scipy -def test_interp1d_bounds_error(): +def test_interp1d_bounds_error() -> None: """Ensure exception on bounds error is raised if requested""" da = xr.DataArray( np.sin(0.3 * np.arange(4)), @@ -898,7 +919,7 @@ def test_interp1d_bounds_error(): (("x", np.array([0, 0.5, 1, 2]), dict(unit="s")), False), ], ) -def test_coord_attrs(x, expect_same_attrs): +def test_coord_attrs(x, expect_same_attrs: bool) -> None: base_attrs = dict(foo="bar") ds = xr.Dataset( data_vars=dict(a=2 * np.arange(5)), @@ -910,7 +931,7 @@ def test_coord_attrs(x, expect_same_attrs): @requires_scipy -def test_interp1d_complex_out_of_bounds(): +def test_interp1d_complex_out_of_bounds() -> None: """Ensure complex nans are used by default""" da = xr.DataArray( np.exp(0.3j * np.arange(4)), From 0479b2d50ace39b02cf58a7131d91b64ad484e90 Mon Sep 17 00:00:00 2001 From: Mick <43316012+headtr1ck@users.noreply.github.com> Date: Sat, 28 May 2022 12:29:15 +0200 Subject: [PATCH 2/6] Typing support for custom backends (#6651) * add str to allowed engine types * Update xarray/backends/api.py * Update xarray/backends/api.py * Update xarray/backends/api.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/backends/api.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 5dd486e952e..1426cd320d9 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -56,6 +56,8 @@ T_NetcdfEngine, Literal["pydap", "pynio", "pseudonetcdf", "cfgrib", "zarr"], Type[BackendEntrypoint], + str, # no nice typing support for custom backends + None, ] T_Chunks = Union[int, dict[Any, Any], Literal["auto"], None] T_NetcdfTypes = Literal[ @@ -392,7 +394,8 @@ def open_dataset( scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ - "pseudonetcdf", "zarr"} or subclass of xarray.backends.BackendEntrypoint, optional + "pseudonetcdf", "zarr", None}, installed backend \ + or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for "netcdf4". A custom backend class (a subclass of ``BackendEntrypoint``) @@ -579,7 +582,8 @@ def open_dataarray( scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ - "pseudonetcdf", "zarr"}, optional + "pseudonetcdf", "zarr", None}, installed backend \ + or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for "netcdf4". @@ -804,8 +808,9 @@ def open_mfdataset( If provided, call this function on each dataset prior to concatenation. You can find the file-name from which each dataset was loaded in ``ds.encoding["source"]``. - engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", "zarr"}, \ - optional + engine : {"netcdf4", "scipy", "pydap", "h5netcdf", "pynio", "cfgrib", \ + "pseudonetcdf", "zarr", None}, installed backend \ + or subclass of xarray.backends.BackendEntrypoint, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for "netcdf4". From 3e099e49cc571f26e8caa44e69d6473475887879 Mon Sep 17 00:00:00 2001 From: Illviljan <14371165+Illviljan@users.noreply.github.com> Date: Sun, 29 May 2022 03:12:07 +0200 Subject: [PATCH 3/6] Allow all interp methods in typing (#6647) * Allow all interp options * renaming the typve variable * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * use the Literal lists in the interpolator checks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update missing.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * more fixes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * more fixes * Get the args of Literals that's inside of the Union * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update missing.py Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- xarray/core/dataarray.py | 3 +-- xarray/core/dataset.py | 5 +++-- xarray/core/missing.py | 35 ++++++++++++++--------------------- xarray/core/types.py | 8 ++++---- 4 files changed, 22 insertions(+), 29 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b5bcc255b70..0341d84b661 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -87,7 +87,6 @@ DatetimeUnitOptions, ErrorOptions, ErrorOptionsWithWarn, - InterpAllOptions, InterpOptions, PadModeOptions, PadReflectOptions, @@ -2626,7 +2625,7 @@ def fillna(self: T_DataArray, value: Any) -> T_DataArray: def interpolate_na( self: T_DataArray, dim: Hashable | None = None, - method: InterpAllOptions = "linear", + method: InterpOptions = "linear", limit: int | None = None, use_coordinate: bool | str = True, max_gap: ( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c5c727f4bed..084437e102c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -109,6 +109,7 @@ CompatOptions, ErrorOptions, ErrorOptionsWithWarn, + InterpOptions, JoinOptions, PadModeOptions, PadReflectOptions, @@ -3037,7 +3038,7 @@ def _reindex( def interp( self, coords: Mapping[Any, Any] = None, - method: str = "linear", + method: InterpOptions = "linear", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, method_non_numeric: str = "nearest", @@ -3298,7 +3299,7 @@ def _validate_interp_indexer(x, new_x): def interp_like( self, other: Dataset | DataArray, - method: str = "linear", + method: InterpOptions = "linear", assume_sorted: bool = False, kwargs: Mapping[str, Any] = None, method_non_numeric: str = "nearest", diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 2e869dbe675..5e954c8ce27 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -4,7 +4,7 @@ import warnings from functools import partial from numbers import Number -from typing import TYPE_CHECKING, Any, Callable, Hashable, Sequence +from typing import TYPE_CHECKING, Any, Callable, Hashable, Sequence, get_args import numpy as np import pandas as pd @@ -16,6 +16,7 @@ from .duck_array_ops import datetime_to_numeric, push, timedelta_to_numeric from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_version, is_duck_dask_array +from .types import Interp1dOptions, InterpOptions from .utils import OrderedSet, is_scalar from .variable import Variable, broadcast_variables @@ -309,7 +310,7 @@ def interp_na( self, dim: Hashable = None, use_coordinate: bool | str = True, - method: str = "linear", + method: InterpOptions = "linear", limit: int = None, max_gap: int | float | str | pd.Timedelta | np.timedelta64 | dt.timedelta = None, keep_attrs: bool = None, @@ -469,28 +470,20 @@ def _import_interpolant(interpolant, method): raise ImportError(f"Interpolation with method {method} requires scipy.") from e -def _get_interpolator(method, vectorizeable_only=False, **kwargs): +def _get_interpolator( + method: InterpOptions, vectorizeable_only: bool = False, **kwargs +): """helper function to select the appropriate interpolator class returns interpolator class and keyword arguments for the class """ - interp1d_methods = [ - "linear", - "nearest", - "zero", - "slinear", - "quadratic", - "cubic", - "polynomial", - ] - valid_methods = interp1d_methods + [ - "barycentric", - "krog", - "pchip", - "spline", - "akima", + interp_class: type[NumpyInterpolator] | type[ScipyInterpolator] | type[ + SplineInterpolator ] + interp1d_methods = get_args(Interp1dOptions) + valid_methods = tuple(vv for v in get_args(InterpOptions) for vv in get_args(v)) + # prioritize scipy.interpolate if ( method == "linear" @@ -597,7 +590,7 @@ def _floatize_x(x, new_x): return x, new_x -def interp(var, indexes_coords, method, **kwargs): +def interp(var, indexes_coords, method: InterpOptions, **kwargs): """Make an interpolation of Variable Parameters @@ -650,7 +643,7 @@ def interp(var, indexes_coords, method, **kwargs): result = Variable(new_dims, interped, attrs=var.attrs) # dimension of the output array - out_dims = OrderedSet() + out_dims: OrderedSet = OrderedSet() for d in var.dims: if d in dims: out_dims.update(indexes_coords[d][1].dims) @@ -660,7 +653,7 @@ def interp(var, indexes_coords, method, **kwargs): return result -def interp_func(var, x, new_x, method, kwargs): +def interp_func(var, x, new_x, method: InterpOptions, kwargs): """ multi-dimensional interpolation for array-like. Interpolated axes should be located in the last position. diff --git a/xarray/core/types.py b/xarray/core/types.py index 38d13c07c3c..f4f86bafc93 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -49,11 +49,11 @@ ] JoinOptions = Literal["outer", "inner", "left", "right", "exact", "override"] -InterpOptions = Literal["linear", "nearest", "zero", "slinear", "quadratic", "cubic"] -Interp1dOptions = Union[InterpOptions, Literal["polynomial"]] -InterpAllOptions = Union[ - Interp1dOptions, Literal["barycentric", "krog", "pchip", "spline", "akima"] +Interp1dOptions = Literal[ + "linear", "nearest", "zero", "slinear", "quadratic", "cubic", "polynomial" ] +InterpolantOptions = Literal["barycentric", "krog", "pchip", "spline", "akima"] +InterpOptions = Union[Interp1dOptions, InterpolantOptions] DatetimeUnitOptions = Literal[ "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "ns", "ps", "fs", "as" From ed5d2b5fa5bfed1cfc558666f485402adabede3d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sun, 29 May 2022 07:10:52 -0600 Subject: [PATCH 4/6] [test-upstream] import `cleanup` fixture from `distributed` (#6650) Co-authored-by: keewis --- xarray/tests/test_distributed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_distributed.py b/xarray/tests/test_distributed.py index b683ba73f75..cde24c101ea 100644 --- a/xarray/tests/test_distributed.py +++ b/xarray/tests/test_distributed.py @@ -12,7 +12,7 @@ from dask.distributed import Client, Lock from distributed.client import futures_of -from distributed.utils_test import cluster, gen_cluster, loop +from distributed.utils_test import cluster, gen_cluster, loop, cleanup # noqa: F401 import xarray as xr from xarray.backends.locks import HDF5_LOCK, CombinedLock From 9fc5a85e50fd8933cbbcd5a2c565f1adaa2eed58 Mon Sep 17 00:00:00 2001 From: ngam <67342040+ngam@users.noreply.github.com> Date: Sun, 29 May 2022 22:00:37 -0400 Subject: [PATCH 5/6] 0-padded month. (#6653) --- HOW_TO_RELEASE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 8d82277ae55..f647263a3a7 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -111,4 +111,4 @@ upstream https://github.com/pydata/xarray (push) As of 2022.03.0, we utilize the [CALVER](https://calver.org/) version system. Specifically, we have adopted the pattern `YYYY.MM.X`, where `YYYY` is a 4-digit -year (e.g. `2022`), `MM` is a 2-digit zero-padded month (e.g. `01` for January), and `X` is the release number (starting at zero at the start of each month and incremented once for each additional release). +year (e.g. `2022`), `0M` is a 2-digit zero-padded month (e.g. `01` for January), and `X` is the release number (starting at zero at the start of each month and incremented once for each additional release). From 46150748c9005918f66456e07c2813d209b950e3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 May 2022 18:04:21 +0000 Subject: [PATCH 6/6] [pre-commit.ci] pre-commit autoupdate (#6654) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v0.950 → v0.960](https://github.com/pre-commit/mirrors-mypy/compare/v0.950...v0.960) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6d6c94ff88f..1ff5e5e32c8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -46,7 +46,7 @@ repos: # - id: velin # args: ["--write", "--compact"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.950 + rev: v0.960 hooks: - id: mypy # Copied from setup.cfg