From face5db1a21f6de81d6c9d4e3a639455a5ba6806 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 2 Apr 2021 10:42:35 +0200 Subject: [PATCH 01/20] add IndexAdapter class + move PandasIndexAdapter --- xarray/core/indexes.py | 169 ++++++++++++++++++++++++++++++++-- xarray/core/indexing.py | 103 +-------------------- xarray/core/variable.py | 9 +- xarray/tests/test_backends.py | 4 +- xarray/tests/test_variable.py | 2 +- 5 files changed, 169 insertions(+), 118 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 0c4a28db93d..194e1136780 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1,12 +1,165 @@ import collections.abc -from typing import Any, Dict, Hashable, Iterable, Mapping, Optional, Tuple, Union +from contextlib import suppress +from datetime import timedelta +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Hashable, + Iterable, + Mapping, + Optional, + Tuple, + Union, +) import numpy as np import pandas as pd -from . import formatting +from . import formatting, utils +from .indexing import ExplicitlyIndexedNDArrayMixin, NumpyIndexingAdapter +from .npcompat import DTypeLike from .utils import is_scalar -from .variable import Variable + +if TYPE_CHECKING: + from .variable import Variable + + +class IndexAdapter: + """Base class inherited by all xarray-compatible indexes.""" + + __slots__ = "coord_names" + + def __init__(self, coord_names: Union[Hashable, Iterable[Hashable]]): + if isinstance(coord_names, Iterable) and not isinstance(coord_names, str): + self.coord_names = tuple(coord_names) + else: + self.coord_names = tuple([coord_names]) + + @classmethod + def from_variables( + cls, variables: Dict[Hashable, "Variable"], **kwargs + ): # pragma: no cover + raise NotImplementedError() + + +class PandasIndexAdapter(IndexAdapter, ExplicitlyIndexedNDArrayMixin): + """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.""" + + __slots__ = ("array", "_dtype") + + def __init__( + self, array: Any, dtype: DTypeLike = None, coord_name: Optional[Hashable] = None + ): + self.array = utils.safe_cast_to_index(array) + + if dtype is None: + if isinstance(array, pd.PeriodIndex): + dtype_ = np.dtype("O") + elif hasattr(array, "categories"): + # category isn't a real numpy dtype + dtype_ = array.categories.dtype + elif not utils.is_valid_numpy_dtype(array.dtype): + dtype_ = np.dtype("O") + else: + dtype_ = array.dtype + else: + dtype_ = np.dtype(dtype) + self._dtype = dtype_ + + if coord_name is None: + coord_name = tuple() + super().__init__(coord_name) + + @classmethod + def from_variables(cls, variables: Dict[Hashable, "Variable"], **kwargs): + if len(variables) > 1: + raise ValueError("Cannot set a pandas.Index from more than one variable") + + varname, var = list(variables.items())[0] + return cls(var.data, dtype=var.dtype, coord_name=varname) + + @property + def dtype(self) -> np.dtype: + return self._dtype + + def __array__(self, dtype: DTypeLike = None) -> np.ndarray: + if dtype is None: + dtype = self.dtype + array = self.array + if isinstance(array, pd.PeriodIndex): + with suppress(AttributeError): + # this might not be public API + array = array.astype("object") + return np.asarray(array.values, dtype=dtype) + + @property + def shape(self) -> Tuple[int]: + return (len(self.array),) + + def __getitem__( + self, indexer + ) -> Union[ + "PandasIndexAdapter", + NumpyIndexingAdapter, + np.ndarray, + np.datetime64, + np.timedelta64, + ]: + key = indexer.tuple + if isinstance(key, tuple) and len(key) == 1: + # unpack key so it can index a pandas.Index object (pandas.Index + # objects don't like tuples) + (key,) = key + + if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional + return NumpyIndexingAdapter(self.array.values)[indexer] + + result = self.array[key] + + if isinstance(result, pd.Index): + result = PandasIndexAdapter(result, dtype=self.dtype) + else: + # result is a scalar + if result is pd.NaT: + # work around the impossibility of casting NaT with asarray + # note: it probably would be better in general to return + # pd.Timestamp rather np.than datetime64 but this is easier + # (for now) + result = np.datetime64("NaT", "ns") + elif isinstance(result, timedelta): + result = np.timedelta64(getattr(result, "value", result), "ns") + elif isinstance(result, pd.Timestamp): + # Work around for GH: pydata/xarray#1932 and numpy/numpy#10668 + # numpy fails to convert pd.Timestamp to np.datetime64[ns] + result = np.asarray(result.to_datetime64()) + elif self.dtype != object: + result = np.asarray(result, dtype=self.dtype) + + # as for numpy.ndarray indexing, we always want the result to be + # a NumPy array. + result = utils.to_0d_array(result) + + return result + + def transpose(self, order) -> pd.Index: + return self.array # self.array should be always one-dimensional + + def __repr__(self) -> str: + return "{}(array={!r}, dtype={!r})".format( + type(self).__name__, self.array, self.dtype + ) + + def copy(self, deep: bool = True) -> "PandasIndexAdapter": + # Not the same as just writing `self.array.copy(deep=deep)`, as + # shallow copies of the underlying numpy.ndarrays become deep ones + # upon pickling + # >>> len(pickle.dumps((self.array, self.array))) + # 4000281 + # >>> len(pickle.dumps((self.array, self.array.copy(deep=False)))) + # 8000341 + array = self.array.copy(deep=True) if deep else self.array + return PandasIndexAdapter(array, self._dtype) def remove_unused_levels_categories(index: pd.Index) -> pd.Index: @@ -68,7 +221,7 @@ def __repr__(self): def default_indexes( - coords: Mapping[Any, Variable], dims: Iterable + coords: Mapping[Any, "Variable"], dims: Iterable ) -> Dict[Hashable, pd.Index]: """Default indexes for a Dataset/DataArray. @@ -89,11 +242,13 @@ def default_indexes( def isel_variable_and_index( name: Hashable, - variable: Variable, + variable: "Variable", index: pd.Index, - indexers: Mapping[Hashable, Union[int, slice, np.ndarray, Variable]], -) -> Tuple[Variable, Optional[pd.Index]]: + indexers: Mapping[Hashable, Union[int, slice, np.ndarray, "Variable"]], +) -> Tuple["Variable", Optional[pd.Index]]: """Index a Variable and pandas.Index together.""" + from .variable import Variable + if not indexers: # nothing to index return variable.copy(deep=False), index diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 82810908bea..99d52e04ebc 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -2,15 +2,12 @@ import functools import operator from collections import defaultdict -from contextlib import suppress -from datetime import timedelta from typing import Any, Callable, Iterable, List, Sequence, Tuple, Union import numpy as np import pandas as pd from . import duck_array_ops, nputils, utils -from .npcompat import DTypeLike from .pycompat import ( dask_array_type, integer_types, @@ -714,6 +711,8 @@ def as_indexable(array): if isinstance(array, np.ndarray): return NumpyIndexingAdapter(array) if isinstance(array, pd.Index): + from .indexes import PandasIndexAdapter + return PandasIndexAdapter(array) if isinstance(array, dask_array_type): return DaskIndexingAdapter(array) @@ -1386,101 +1385,3 @@ def __setitem__(self, key, value): def transpose(self, order): return self.array.transpose(order) - - -class PandasIndexAdapter(ExplicitlyIndexedNDArrayMixin): - """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.""" - - __slots__ = ("array", "_dtype") - - def __init__(self, array: Any, dtype: DTypeLike = None): - self.array = utils.safe_cast_to_index(array) - if dtype is None: - if isinstance(array, pd.PeriodIndex): - dtype_ = np.dtype("O") - elif hasattr(array, "categories"): - # category isn't a real numpy dtype - dtype_ = array.categories.dtype - elif not utils.is_valid_numpy_dtype(array.dtype): - dtype_ = np.dtype("O") - else: - dtype_ = array.dtype - else: - dtype_ = np.dtype(dtype) - self._dtype = dtype_ - - @property - def dtype(self) -> np.dtype: - return self._dtype - - def __array__(self, dtype: DTypeLike = None) -> np.ndarray: - if dtype is None: - dtype = self.dtype - array = self.array - if isinstance(array, pd.PeriodIndex): - with suppress(AttributeError): - # this might not be public API - array = array.astype("object") - return np.asarray(array.values, dtype=dtype) - - @property - def shape(self) -> Tuple[int]: - return (len(self.array),) - - def __getitem__( - self, indexer - ) -> Union[NumpyIndexingAdapter, np.ndarray, np.datetime64, np.timedelta64]: - key = indexer.tuple - if isinstance(key, tuple) and len(key) == 1: - # unpack key so it can index a pandas.Index object (pandas.Index - # objects don't like tuples) - (key,) = key - - if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional - return NumpyIndexingAdapter(self.array.values)[indexer] - - result = self.array[key] - - if isinstance(result, pd.Index): - result = PandasIndexAdapter(result, dtype=self.dtype) - else: - # result is a scalar - if result is pd.NaT: - # work around the impossibility of casting NaT with asarray - # note: it probably would be better in general to return - # pd.Timestamp rather np.than datetime64 but this is easier - # (for now) - result = np.datetime64("NaT", "ns") - elif isinstance(result, timedelta): - result = np.timedelta64(getattr(result, "value", result), "ns") - elif isinstance(result, pd.Timestamp): - # Work around for GH: pydata/xarray#1932 and numpy/numpy#10668 - # numpy fails to convert pd.Timestamp to np.datetime64[ns] - result = np.asarray(result.to_datetime64()) - elif self.dtype != object: - result = np.asarray(result, dtype=self.dtype) - - # as for numpy.ndarray indexing, we always want the result to be - # a NumPy array. - result = utils.to_0d_array(result) - - return result - - def transpose(self, order) -> pd.Index: - return self.array # self.array should be always one-dimensional - - def __repr__(self) -> str: - return "{}(array={!r}, dtype={!r})".format( - type(self).__name__, self.array, self.dtype - ) - - def copy(self, deep: bool = True) -> "PandasIndexAdapter": - # Not the same as just writing `self.array.copy(deep=deep)`, as - # shallow copies of the underlying numpy.ndarrays become deep ones - # upon pickling - # >>> len(pickle.dumps((self.array, self.array))) - # 4000281 - # >>> len(pickle.dumps((self.array, self.array.copy(deep=False)))) - # 8000341 - array = self.array.copy(deep=True) if deep else self.array - return PandasIndexAdapter(array, self._dtype) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2e97fad91b2..019bd15b90c 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -25,13 +25,8 @@ import xarray as xr # only for Dataset and DataArray from . import arithmetic, common, dtypes, duck_array_ops, indexing, nputils, ops, utils -from .indexing import ( - BasicIndexer, - OuterIndexer, - PandasIndexAdapter, - VectorizedIndexer, - as_indexable, -) +from .indexes import PandasIndexAdapter +from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable from .options import _get_keep_attrs from .pycompat import ( cupy_array_type, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f6c00a2a9a9..d3e305ce4a0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -35,7 +35,7 @@ from xarray.backends.pydap_ import PydapDataStore from xarray.coding.variables import SerializationWarning from xarray.conventions import encode_dataset_coordinates -from xarray.core import indexing +from xarray.core import indexes, indexing from xarray.core.options import set_options from xarray.core.pycompat import dask_array_type from xarray.tests import LooseVersion, mock @@ -736,7 +736,7 @@ def find_and_validate_array(obj): elif isinstance(obj.array, dask_array_type): assert isinstance(obj, indexing.DaskIndexingAdapter) elif isinstance(obj.array, pd.Index): - assert isinstance(obj, indexing.PandasIndexAdapter) + assert isinstance(obj, indexes.PandasIndexAdapter) else: raise TypeError( "{} is wrapped by {}".format(type(obj.array), type(obj)) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c951e7d3118..a80b3b88883 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -11,6 +11,7 @@ from xarray import Coordinate, DataArray, Dataset, IndexVariable, Variable, set_options from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.common import full_like, ones_like, zeros_like +from xarray.core.indexes import PandasIndexAdapter from xarray.core.indexing import ( BasicIndexer, CopyOnWriteArray, @@ -19,7 +20,6 @@ MemoryCachedArray, NumpyIndexingAdapter, OuterIndexer, - PandasIndexAdapter, VectorizedIndexer, ) from xarray.core.pycompat import dask_array_type From ce3e185b19c5d167a1edb3bf5b75ea13ad693fd1 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 2 Apr 2021 17:52:20 +0200 Subject: [PATCH 02/20] wip: xarray_obj.indexes -> IndexAdapter objects --- xarray/core/alignment.py | 26 ++++++----- xarray/core/combine.py | 10 ++++- xarray/core/common.py | 5 ++- xarray/core/coordinates.py | 6 +-- xarray/core/dataarray.py | 15 +++++-- xarray/core/dataset.py | 45 ++++++++++--------- xarray/core/indexes.py | 54 ++++++++++++++++++----- xarray/core/indexing.py | 8 +++- xarray/core/merge.py | 25 ++++++----- xarray/core/variable.py | 5 +++ xarray/testing.py | 5 +-- xarray/tests/test_cftimeindex.py | 2 +- xarray/tests/test_cftimeindex_resample.py | 6 ++- xarray/tests/test_concat.py | 2 +- xarray/tests/test_dataarray.py | 4 +- 15 files changed, 145 insertions(+), 73 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 98cbadcb25c..0d0a21f90e4 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -17,7 +17,10 @@ import numpy as np import pandas as pd -from . import dtypes, utils +from xarray.core.indexes import PandasIndexAdapter + +from . import dtypes +from .indexes import IndexAdapter from .indexing import get_indexer_nd from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str from .variable import IndexVariable, Variable @@ -30,11 +33,11 @@ DataAlignable = TypeVar("DataAlignable", bound=DataWithCoords) -def _get_joiner(join): +def _get_joiner(join, index_cls): if join == "outer": - return functools.partial(functools.reduce, pd.Index.union) + return functools.partial(functools.reduce, index_cls.union) elif join == "inner": - return functools.partial(functools.reduce, pd.Index.intersection) + return functools.partial(functools.reduce, index_cls.intersection) elif join == "left": return operator.itemgetter(0) elif join == "right": @@ -298,16 +301,15 @@ def align( # - It ensures it's possible to do operations that don't require alignment # on indexes with duplicate values (which cannot be reindexed with # pandas). This is useful, e.g., for overwriting such duplicate indexes. - joiner = _get_joiner(join) joined_indexes = {} for dim, matching_indexes in all_indexes.items(): if dim in indexes: - index = utils.safe_cast_to_index(indexes[dim]) + index = PandasIndexAdapter(indexes[dim]) if ( any(not index.equals(other) for other in matching_indexes) or dim in unlabeled_dim_sizes ): - joined_indexes[dim] = indexes[dim] + joined_indexes[dim] = index else: if ( any( @@ -318,6 +320,7 @@ def align( ): if join == "exact": raise ValueError(f"indexes along dimension {dim!r} are not equal") + joiner = _get_joiner(join, type(matching_indexes[0])) index = joiner(matching_indexes) # make sure str coords are not cast to object index = maybe_coerce_to_str(index, all_coords[dim]) @@ -487,14 +490,14 @@ def reindex_like_indexers( def reindex_variables( variables: Mapping[Any, Variable], sizes: Mapping[Any, int], - indexes: Mapping[Any, pd.Index], + indexes: Mapping[Any, IndexAdapter], indexers: Mapping, method: Optional[str] = None, tolerance: Any = None, copy: bool = True, fill_value: Optional[Any] = dtypes.NA, sparse: bool = False, -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, IndexAdapter]]: """Conform a dictionary of aligned variables onto a new set of variables, filling in missing values with NaN. @@ -559,10 +562,11 @@ def reindex_variables( "from that to be indexed along {:s}".format(str(indexer.dims), dim) ) - target = new_indexes[dim] = utils.safe_cast_to_index(indexers[dim]) + target = new_indexes[dim] = PandasIndexAdapter(np.asarray(indexers[dim])) if dim in indexes: - index = indexes[dim] + # TODO (benbovy - flexible indexes): support other indexes than pd.Index? + index = indexes[dim].array if not index.is_unique: raise ValueError( diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 573247937b7..2dade92e5fe 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -64,6 +64,10 @@ def _infer_concat_order_from_coords(datasets): "inferring concatenation order" ) + # TODO (benbovy, flexible indexes): all indexes should be Pandas.Index + # get pd.Index objects from IndexAdapter objects + indexes = [index.array for index in indexes] + # If dimension coordinate values are same on every dataset then # should be leaving this dimension alone (it's just a "bystander") if not all(index.equals(indexes[0]) for index in indexes[1:]): @@ -786,9 +790,13 @@ def combine_by_coords( ) # Check the overall coordinates are monotonically increasing + # TODO (benbovy - flexible indexes): only with pandas.Index? for dim in concat_dims: indexes = concatenated.indexes.get(dim) - if not (indexes.is_monotonic_increasing or indexes.is_monotonic_decreasing): + if not ( + indexes.array.is_monotonic_increasing + or indexes.array.is_monotonic_decreasing + ): raise ValueError( "Resulting object does not have monotonic" " global indexes along dimension {}".format(dim) diff --git a/xarray/core/common.py b/xarray/core/common.py index 321bd632811..787fafafa37 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -386,7 +386,7 @@ def get_index(self, key: Hashable) -> pd.Index: raise KeyError(key) try: - return self.indexes[key] + return self.indexes[key].array except KeyError: return pd.Index(range(self.sizes[key]), name=key) @@ -1140,7 +1140,8 @@ def resample( category=FutureWarning, ) - if isinstance(self.indexes[dim_name], CFTimeIndex): + # TODO (benbovy - flexible indexes): update when CFTimeIndex is an IndexAdpater subclass + if isinstance(self.indexes[dim_name].array, CFTimeIndex): from .resample_cftime import CFTimeGrouper grouper = CFTimeGrouper(freq, closed, label, base, loffset) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index f9445bed619..b4b68d25e21 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -17,7 +17,7 @@ import pandas as pd from . import formatting, indexing -from .indexes import Indexes +from .indexes import IndexAdapter, Indexes from .merge import merge_coordinates_without_align, merge_coords from .utils import Frozen, ReprObject, either_dict_or_kwargs from .variable import Variable @@ -262,7 +262,7 @@ def to_dataset(self) -> "Dataset": return self._data._copy_listed(names) def _update_coords( - self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, pd.Index] + self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, IndexAdapter] ) -> None: from .dataset import calculate_dimensions @@ -325,7 +325,7 @@ def __getitem__(self, key: Hashable) -> "DataArray": return self._data._getitem_coord(key) def _update_coords( - self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, pd.Index] + self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, IndexAdapter] ) -> None: from .dataset import calculate_dimensions diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1f82b48d7f8..9f014a79a25 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -51,7 +51,13 @@ ) from .dataset import Dataset, split_indexes from .formatting import format_item -from .indexes import Indexes, default_indexes, propagate_indexes +from .indexes import ( + IndexAdapter, + Indexes, + PandasIndexAdapter, + default_indexes, + propagate_indexes, +) from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords from .options import OPTIONS, _get_keep_attrs @@ -345,7 +351,7 @@ class DataArray(AbstractArray, DataWithCoords): _cache: Dict[str, Any] _coords: Dict[Any, Variable] _close: Optional[Callable[[], None]] - _indexes: Optional[Dict[Hashable, pd.Index]] + _indexes: Optional[Dict[Hashable, IndexAdapter]] _name: Optional[Hashable] _variable: Variable @@ -990,7 +996,10 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": if self._indexes is None: indexes = self._indexes else: - indexes = {k: v.copy(deep=deep) for k, v in self._indexes.items()} + indexes = { + k: PandasIndexAdapter(v.array.copy(deep=deep)) + for k, v in self._indexes.items() + } return self._replace(variable, coords, indexes=indexes) def __copy__(self) -> "DataArray": diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e959135e8d9..75bd3403075 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -64,7 +64,9 @@ ) from .duck_array_ops import datetime_to_numeric from .indexes import ( + IndexAdapter, Indexes, + PandasIndexAdapter, default_indexes, isel_variable_and_index, propagate_indexes, @@ -638,7 +640,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): _dims: Dict[Hashable, int] _encoding: Optional[Dict[Hashable, Any]] _close: Optional[Callable[[], None]] - _indexes: Optional[Dict[Hashable, pd.Index]] + _indexes: Optional[Dict[Hashable, IndexAdapter]] _variables: Dict[Hashable, Variable] __slots__ = ( @@ -1033,7 +1035,7 @@ def _replace( coord_names: Set[Hashable] = None, dims: Dict[Any, int] = None, attrs: Union[Dict[Hashable, Any], None, Default] = _default, - indexes: Union[Dict[Any, pd.Index], None, Default] = _default, + indexes: Union[Dict[Any, IndexAdapter], None, Default] = _default, encoding: Union[dict, None, Default] = _default, inplace: bool = False, ) -> "Dataset": @@ -1082,7 +1084,7 @@ def _replace_with_new_dims( variables: Dict[Hashable, Variable], coord_names: set = None, attrs: Union[Dict[Hashable, Any], None, Default] = _default, - indexes: Union[Dict[Hashable, pd.Index], None, Default] = _default, + indexes: Union[Dict[Hashable, IndexAdapter], None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Replace variables with recalculated dimensions.""" @@ -1110,7 +1112,7 @@ def _replace_vars_and_dims( variables, coord_names, dims, attrs, indexes=None, inplace=inplace ) - def _overwrite_indexes(self, indexes: Mapping[Any, pd.Index]) -> "Dataset": + def _overwrite_indexes(self, indexes: Mapping[Any, IndexAdapter]) -> "Dataset": if not indexes: return self @@ -1124,8 +1126,9 @@ def _overwrite_indexes(self, indexes: Mapping[Any, pd.Index]) -> "Dataset": # switch from dimension to level names, if necessary dim_names: Dict[Hashable, str] = {} for dim, idx in indexes.items(): - if not isinstance(idx, pd.MultiIndex) and idx.name != dim: - dim_names[dim] = idx.name + pd_idx = idx.array + if not isinstance(pd_idx, pd.MultiIndex) and pd_idx.name != dim: + dim_names[dim] = pd_idx.name if dim_names: obj = obj.rename(dim_names) return obj @@ -1261,7 +1264,8 @@ def _level_coords(self) -> Dict[str, Hashable]: coordinate name. """ level_coords: Dict[str, Hashable] = {} - for name, index in self.indexes.items(): + for name, index_adapter in self.indexes.items(): + index = index_adapter if isinstance(index, pd.MultiIndex): level_names = index.names (dim,) = self.variables[name].dims @@ -2111,7 +2115,7 @@ def isel( continue if indexes and var_name in indexes: if var_value.ndim == 1: - indexes[var_name] = var_value.to_index() + indexes[var_name] = PandasIndexAdapter(var_value.data) else: del indexes[var_name] variables[var_name] = var_value @@ -2139,7 +2143,7 @@ def _isel_fancy( indexers_list = list(self._validate_indexers(indexers, missing_dims)) variables: Dict[Hashable, Variable] = {} - indexes: Dict[Hashable, pd.Index] = {} + indexes: Dict[Hashable, IndexAdapter] = {} for name, var in self.variables.items(): var_indexers = {k: v for k, v in indexers_list if k in var.dims} @@ -3009,15 +3013,16 @@ def _rename_indexes(self, name_dict, dims_set): return None indexes = {} for k, v in self.indexes.items(): + index = v.array new_name = name_dict.get(k, k) if new_name not in dims_set: continue - if isinstance(v, pd.MultiIndex): - new_names = [name_dict.get(k, k) for k in v.names] - index = v.rename(names=new_names) + if isinstance(index, pd.MultiIndex): + new_names = [name_dict.get(k, k) for k in index.names] + new_index = index.rename(names=new_names) else: - index = v.rename(new_name) - indexes[new_name] = index + new_index = index.rename(new_name) + indexes[new_name] = PandasIndexAdapter(new_index) return indexes def _rename_all(self, name_dict, dims_dict): @@ -3234,7 +3239,7 @@ def swap_dims( coord_names.update({dim for dim in dims_dict.values() if dim in self.variables}) variables: Dict[Hashable, Variable] = {} - indexes: Dict[Hashable, pd.Index] = {} + indexes: Dict[Hashable, IndexAdapter] = {} for k, v in self.variables.items(): dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) if k in result_dims: @@ -3246,7 +3251,7 @@ def swap_dims( if new_index.nlevels == 1: # make sure index name matches dimension name new_index = new_index.rename(k) - indexes[k] = new_index + indexes[k] = PandasIndexAdapter(new_index) else: var = v.to_base_variable() var.dims = dims @@ -3517,7 +3522,7 @@ def reorder_levels( raise ValueError(f"coordinate {dim} has no MultiIndex") new_index = index.reorder_levels(order) variables[dim] = IndexVariable(coord.dims, new_index) - indexes[dim] = new_index + indexes[dim] = PandasIndexAdapter(new_index) return self._replace(variables, indexes=indexes) @@ -3545,7 +3550,7 @@ def _stack_once(self, dims, new_dim): coord_names = set(self._coord_names) - set(dims) | {new_dim} indexes = {k: v for k, v in self.indexes.items() if k not in dims} - indexes[new_dim] = idx + indexes[new_dim] = PandasIndexAdapter(idx) return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes @@ -3732,7 +3737,7 @@ def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset": for name, lev in zip(index.names, index.levels): variables[name] = IndexVariable(name, lev) - indexes[name] = lev + indexes[name] = PandasIndexAdapter(lev) coord_names = set(self._coord_names) - {dim} | set(index.names) @@ -3771,7 +3776,7 @@ def _unstack_full_reindex( for name, lev in zip(new_dim_names, index.levels): variables[name] = IndexVariable(name, lev) - indexes[name] = lev + indexes[name] = PandasIndexAdapter(lev) coord_names = set(self._coord_names) - {dim} | set(new_dim_names) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 194e1136780..3870d433142 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -36,12 +36,26 @@ def __init__(self, coord_names: Union[Hashable, Iterable[Hashable]]): else: self.coord_names = tuple([coord_names]) + # TODO (benbovy - flexible indexes): remove + # temporarly avoid mypy errors: the `array` attribute is used in many places + # to access the underlying pandas.Index objects from xarray_obj.indexes + self.array = pd.Index([]) + @classmethod def from_variables( cls, variables: Dict[Hashable, "Variable"], **kwargs ): # pragma: no cover raise NotImplementedError() + def equals(self, other): # pragma: no cover + raise NotImplementedError() + + def union(self, other): # pragma: no cover + raise NotImplementedError() + + def intersection(self, other): # pragma: no cover + raise NotImplementedError() + class PandasIndexAdapter(IndexAdapter, ExplicitlyIndexedNDArrayMixin): """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.""" @@ -51,6 +65,10 @@ class PandasIndexAdapter(IndexAdapter, ExplicitlyIndexedNDArrayMixin): def __init__( self, array: Any, dtype: DTypeLike = None, coord_name: Optional[Hashable] = None ): + if coord_name is None: + coord_name = tuple() + super().__init__(coord_name) + self.array = utils.safe_cast_to_index(array) if dtype is None: @@ -67,10 +85,6 @@ def __init__( dtype_ = np.dtype(dtype) self._dtype = dtype_ - if coord_name is None: - coord_name = tuple() - super().__init__(coord_name) - @classmethod def from_variables(cls, variables: Dict[Hashable, "Variable"], **kwargs): if len(variables) > 1: @@ -97,6 +111,24 @@ def __array__(self, dtype: DTypeLike = None) -> np.ndarray: def shape(self) -> Tuple[int]: return (len(self.array),) + def equals(self, other): + if isinstance(other, pd.Index): + return self.array.equals(other) + else: + return self.array.equals(other.array) + + def union(self, other): + if isinstance(other, pd.Index): + return self.array.union(other) + else: + return self.array.union(other.array) + + def intersection(self, other): + if isinstance(other, pd.Index): + return self.array.intersection(other) + else: + return self.array.intersection(other.array) + def __getitem__( self, indexer ) -> Union[ @@ -222,7 +254,7 @@ def __repr__(self): def default_indexes( coords: Mapping[Any, "Variable"], dims: Iterable -) -> Dict[Hashable, pd.Index]: +) -> Dict[Hashable, IndexAdapter]: """Default indexes for a Dataset/DataArray. Parameters @@ -237,15 +269,15 @@ def default_indexes( Mapping from indexing keys (levels/dimension names) to indexes used for indexing along that dimension. """ - return {key: coords[key].to_index() for key in dims if key in coords} + return {key: coords[key]._to_index_adpater() for key in dims if key in coords} def isel_variable_and_index( name: Hashable, variable: "Variable", - index: pd.Index, + index: IndexAdapter, indexers: Mapping[Hashable, Union[int, slice, np.ndarray, "Variable"]], -) -> Tuple["Variable", Optional[pd.Index]]: +) -> Tuple["Variable", Optional[IndexAdapter]]: """Index a Variable and pandas.Index together.""" from .variable import Variable @@ -269,7 +301,7 @@ def isel_variable_and_index( indexer = indexers[dim] if isinstance(indexer, Variable): indexer = indexer.data - new_index = index[indexer] + new_index = PandasIndexAdapter(index.array[indexer]) return new_variable, new_index @@ -283,8 +315,8 @@ def roll_index(index: pd.Index, count: int, axis: int = 0) -> pd.Index: def propagate_indexes( - indexes: Optional[Dict[Hashable, pd.Index]], exclude: Optional[Any] = None -) -> Optional[Dict[Hashable, pd.Index]]: + indexes: Optional[Dict[Hashable, IndexAdapter]], exclude: Optional[Any] = None +) -> Optional[Dict[Hashable, IndexAdapter]]: """Creates new indexes dict from existing dict optionally excluding some dimensions.""" if exclude is None: exclude = () diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 99d52e04ebc..f16c7be6eeb 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -108,6 +108,8 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No dimension. If `index` is a pandas.MultiIndex and depending on `label`, return a new pandas.Index or pandas.MultiIndex (otherwise return None). """ + from .indexes import PandasIndexAdapter + new_index = None if isinstance(label, slice): @@ -197,6 +199,10 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No indexer = get_indexer_nd(index, label, method, tolerance) if np.any(indexer < 0): raise KeyError(f"not all values found in index {index_name!r}") + + if new_index is not None: + new_index = PandasIndexAdapter(new_index) + return indexer, new_index @@ -251,7 +257,7 @@ def remap_label_indexers(data_obj, indexers, method=None, tolerance=None): dim_indexers = get_dim_indexers(data_obj, indexers) for dim, label in dim_indexers.items(): try: - index = data_obj.indexes[dim] + index = data_obj.indexes[dim].array except KeyError: # no index for this dimension: reuse the provided labels if method is not None or tolerance is not None: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index ec95563bda9..867014c125c 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,6 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv +from .indexes import IndexAdapter from .utils import Frozen, compat_dict_union, dict_equiv, equivalent from .variable import Variable, as_variable, assert_unique_multiindex_level_names @@ -157,14 +158,14 @@ def _assert_compat_valid(compat): ) -MergeElement = Tuple[Variable, Optional[pd.Index]] +MergeElement = Tuple[Variable, Optional[IndexAdapter]] def merge_collected( grouped: Dict[Hashable, List[MergeElement]], prioritized: Mapping[Hashable, MergeElement] = None, compat: str = "minimal", -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, IndexAdapter]]: """Merge dicts of variables, while resolving conflicts appropriately. Parameters @@ -186,7 +187,7 @@ def merge_collected( _assert_compat_valid(compat) merged_vars: Dict[Hashable, Variable] = {} - merged_indexes: Dict[Hashable, pd.Index] = {} + merged_indexes: Dict[Hashable, IndexAdapter] = {} for name, elements_list in grouped.items(): if name in prioritized: @@ -251,7 +252,7 @@ def collect_variables_and_indexes( from .dataarray import DataArray from .dataset import Dataset - grouped: Dict[Hashable, List[Tuple[Variable, pd.Index]]] = {} + grouped: Dict[Hashable, List[Tuple[Variable, Optional[IndexAdapter]]]] = {} def append(name, variable, index): values = grouped.setdefault(name, []) @@ -278,7 +279,7 @@ def append_all(variables, indexes): variable = as_variable(variable, name=name) if variable.dims == (name,): variable = variable.to_index_variable() - index = variable.to_index() + index = variable._to_index_adpater() else: index = None append(name, variable, index) @@ -290,7 +291,7 @@ def collect_from_coordinates( list_of_coords: "List[Coordinates]", ) -> Dict[Hashable, List[MergeElement]]: """Collect variables and indexes to be merged from Coordinate objects.""" - grouped: Dict[Hashable, List[Tuple[Variable, pd.Index]]] = {} + grouped: Dict[Hashable, List[Tuple[Variable, Optional[IndexAdapter]]]] = {} for coords in list_of_coords: variables = coords.variables @@ -305,7 +306,7 @@ def merge_coordinates_without_align( objects: "List[Coordinates]", prioritized: Mapping[Hashable, MergeElement] = None, exclude_dims: AbstractSet = frozenset(), -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, IndexAdapter]]: """Merge variables/indexes from coordinates without automatic alignments. This function is used for merging coordinate from pre-existing xarray @@ -438,9 +439,9 @@ def merge_coords( compat: str = "minimal", join: str = "outer", priority_arg: Optional[int] = None, - indexes: Optional[Mapping[Hashable, pd.Index]] = None, + indexes: Optional[Mapping[Hashable, IndexAdapter]] = None, fill_value: object = dtypes.NA, -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, pd.Index]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, IndexAdapter]]: """Merge coordinate variables. See merge_core below for argument descriptions. This works similarly to @@ -474,7 +475,7 @@ def _extract_indexes_from_coords(coords): for name, variable in coords.items(): variable = as_variable(variable, name=name) if variable.dims == (name,): - yield name, variable.to_index() + yield name, variable._to_index_adpater() def assert_valid_explicit_coords(variables, dims, explicit_coords): @@ -559,7 +560,7 @@ def merge_core( combine_attrs: Optional[str] = "override", priority_arg: Optional[int] = None, explicit_coords: Optional[Sequence] = None, - indexes: Optional[Mapping[Hashable, pd.Index]] = None, + indexes: Optional[Mapping[Hashable, IndexAdapter]] = None, fill_value: object = dtypes.NA, ) -> _MergeResult: """Core logic for merging labeled objects. @@ -962,6 +963,6 @@ def dataset_update_method( return merge_core( [dataset, other], priority_arg=1, - indexes=indexes, + indexes=indexes, # type: ignore combine_attrs="override", ) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 019bd15b90c..c8e91b3b802 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -551,6 +551,11 @@ def to_index_variable(self): to_coord = utils.alias(to_index_variable, "to_coord") + def _to_index_adpater(self): + # temporary function used internally as a replacement of to_index() + # returns an IndexAdpater instance instead of a pd.Index instance + return PandasIndexAdapter(self.to_index()) + def to_index(self): """Convert this variable to a pandas.Index""" return self.to_index_variable().to_index() diff --git a/xarray/testing.py b/xarray/testing.py index 2129b1e1aa4..48ebd6bc32f 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -4,12 +4,11 @@ from typing import Hashable, Set, Union import numpy as np -import pandas as pd from xarray.core import duck_array_ops, formatting, utils from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -from xarray.core.indexes import default_indexes +from xarray.core.indexes import IndexAdapter, default_indexes from xarray.core.variable import IndexVariable, Variable __all__ = ( @@ -254,7 +253,7 @@ def assert_chunks_equal(a, b): def _assert_indexes_invariants_checks(indexes, possible_coord_variables, dims): assert isinstance(indexes, dict), indexes - assert all(isinstance(v, pd.Index) for v in indexes.values()), { + assert all(isinstance(v, IndexAdapter) for v in indexes.values()), { k: type(v) for k, v in indexes.items() } diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 71d6ffc8fff..c8a789a76aa 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -696,7 +696,7 @@ def test_concat_cftimeindex(date_type): ) da = xr.concat([da1, da2], dim="time") - assert isinstance(da.indexes["time"], CFTimeIndex) + assert isinstance(da.indexes["time"].array, CFTimeIndex) @requires_cftime diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index c4f32795b59..2e33ab21a04 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -99,7 +99,8 @@ def test_resample(freqs, closed, label, base): ) .mean() ) - da_cftime["time"] = da_cftime.indexes["time"].to_datetimeindex() + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a IndexAdpater subclass + da_cftime["time"] = da_cftime.indexes["time"].array.to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) @@ -145,5 +146,6 @@ def test_calendars(calendar): .resample(time=freq, closed=closed, label=label, base=base, loffset=loffset) .mean() ) - da_cftime["time"] = da_cftime.indexes["time"].to_datetimeindex() + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a IndexAdpater subclass + da_cftime["time"] = da_cftime.indexes["time"].array.to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index beed48a35fc..c2d182b1ca0 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -517,7 +517,7 @@ def test_concat(self): stacked = concat(grouped, ds["x"]) assert_identical(foo, stacked) # with an index as the 'dim' argument - stacked = concat(grouped, ds.indexes["x"]) + stacked = concat(grouped, pd.Index(ds["x"], name="x")) assert_identical(foo, stacked) actual = concat([foo[0], foo[1]], pd.Index([0, 1])).reset_coords(drop=True) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 20e357543d6..0891c53899e 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1630,7 +1630,7 @@ def test_swap_dims(self): assert_identical(expected, actual) for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name], actual.indexes[dim_name] + expected.indexes[dim_name].array, actual.indexes[dim_name].array ) array = DataArray(np.random.randn(3), {"x": list("abc")}, "x") @@ -1660,7 +1660,7 @@ def test_swap_dims(self): assert_identical(expected, actual) for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name], actual.indexes[dim_name] + expected.indexes[dim_name].array, actual.indexes[dim_name].array ) def test_expand_dims_error(self): From 7dd1d0edfdad00b8e7c9c77f4f0839de1a5547ec Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 29 Apr 2021 16:05:44 +0200 Subject: [PATCH 03/20] fix more broken tests --- xarray/core/alignment.py | 12 ++++++---- xarray/core/dataarray.py | 8 +++++-- xarray/core/dataset.py | 30 +++++++++++++++-------- xarray/core/indexes.py | 22 ++++++++++------- xarray/core/merge.py | 7 ++++-- xarray/core/missing.py | 4 +++- xarray/core/parallel.py | 16 +++++++++---- xarray/tests/test_dataset.py | 46 ++++++++++++++++++++++-------------- 8 files changed, 95 insertions(+), 50 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 0d0a21f90e4..19a0a86136d 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -17,12 +17,10 @@ import numpy as np import pandas as pd -from xarray.core.indexes import PandasIndexAdapter - from . import dtypes -from .indexes import IndexAdapter +from .indexes import IndexAdapter, PandasIndexAdapter from .indexing import get_indexer_nd -from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str +from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str, safe_cast_to_index from .variable import IndexVariable, Variable if TYPE_CHECKING: @@ -304,7 +302,11 @@ def align( joined_indexes = {} for dim, matching_indexes in all_indexes.items(): if dim in indexes: - index = PandasIndexAdapter(indexes[dim]) + # TODO: benbovy - flexible indexes. maybe move this logic in util func + if isinstance(indexes[dim], IndexAdapter): + index = indexes[dim] + else: + index = PandasIndexAdapter(safe_cast_to_index(indexes[dim])) if ( any(not index.equals(other) for other in matching_indexes) or dim in unlabeled_dim_sizes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 9f014a79a25..6366d7a1436 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -484,7 +484,9 @@ def _overwrite_indexes(self, indexes: Mapping[Hashable, Any]) -> "DataArray": # switch from dimension to level names, if necessary dim_names: Dict[Any, str] = {} for dim, idx in indexes.items(): - if not isinstance(idx, pd.MultiIndex) and idx.name != dim: + # TODO: benbovy - flexible indexes: update when MultiIndex has its own class + pd_idx = idx.array + if not isinstance(pd_idx, pd.MultiIndex) and pd_idx.name != dim: dim_names[dim] = idx.name if dim_names: obj = obj.rename(dim_names) @@ -2181,7 +2183,9 @@ def to_unstacked_dataset(self, dim, level=0): Dataset.to_stacked_array """ - idx = self.indexes[dim] + # TODO: benbovy - flexible indexes: update when MultIndex has its own + # class inheriting from xarray.Index + idx = self.indexes[dim].array if not isinstance(idx, pd.MultiIndex): raise ValueError(f"'{dim}' is not a stacked coordinate") diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 75bd3403075..5b739611b24 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1266,8 +1266,9 @@ def _level_coords(self) -> Dict[str, Hashable]: level_coords: Dict[str, Hashable] = {} for name, index_adapter in self.indexes.items(): index = index_adapter - if isinstance(index, pd.MultiIndex): - level_names = index.names + # TODO: benbovy - flexible indexes: update when MultIndex has its own xarray class. + if isinstance(index.array, pd.MultiIndex): + level_names = index.array.names (dim,) = self.variables[name].dims level_coords.update({lname: dim for lname in level_names}) return level_coords @@ -1278,7 +1279,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": """ variables: Dict[Hashable, Variable] = {} coord_names = set() - indexes: Dict[Hashable, pd.Index] = {} + indexes: Dict[Hashable, IndexAdapter] = {} for name in names: try: @@ -1291,7 +1292,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": if ref_name in self._coord_names or ref_name in self.dims: coord_names.add(var_name) if (var_name,) == var.dims: - indexes[var_name] = var.to_index() + indexes[var_name] = var._to_index_adpater() needed_dims: Set[Hashable] = set() for v in variables.values(): @@ -1966,7 +1967,9 @@ def _validate_indexers( v = np.asarray(v) if v.dtype.kind in "US": - index = self.indexes[k] + # TODO: benbovy - flexible indexes + # update when CFTimeIndex has its own xarray index class + index = self.indexes[k].array if isinstance(index, pd.DatetimeIndex): v = v.astype("datetime64[ns]") elif isinstance(index, xr.CFTimeIndex): @@ -2115,7 +2118,7 @@ def isel( continue if indexes and var_name in indexes: if var_value.ndim == 1: - indexes[var_name] = PandasIndexAdapter(var_value.data) + indexes[var_name] = var_value._to_index_adpater() else: del indexes[var_name] variables[var_name] = var_value @@ -2916,7 +2919,7 @@ def _validate_interp_indexer(x, new_x): for k, v in indexers.items(): assert isinstance(v, Variable) if v.dims == (k,): - indexes[k] = v.to_index() + indexes[k] = v._to_index_adpater() # Extract coordinates from indexers coord_vars, new_indexes = selected._get_indexers_coords_and_indexes(coords) @@ -3517,7 +3520,9 @@ def reorder_levels( indexes = dict(self.indexes) for dim, order in dim_order.items(): coord = self._variables[dim] - index = self.indexes[dim] + # TODO: benbovy - flexible indexes: update when MultiIndex + # has its own class inherited from xarray.Index + index = self.indexes[dim].array if not isinstance(index, pd.MultiIndex): raise ValueError(f"coordinate {dim} has no MultiIndex") new_index = index.reorder_levels(order) @@ -3702,7 +3707,9 @@ def ensure_stackable(val): # coerce the levels of the MultiIndex to have the same type as the # input dimensions. This code is messy, so it might be better to just # input a dummy value for the singleton dimension. - idx = data_array.indexes[new_dim] + # TODO: benbovy - flexible indexes: update when MultIndex has its own + # class inheriting from xarray.Index + idx = data_array.indexes[new_dim].array levels = [idx.levels[0]] + [ level.astype(self[level.name].dtype) for level in idx.levels[1:] ] @@ -5529,7 +5536,10 @@ def diff(self, dim, n=1, label="upper"): indexes = dict(self.indexes) if dim in indexes: - indexes[dim] = indexes[dim][kwargs_new[dim]] + # TODO: benbovy - flexible indexes: check slicing of xarray indexes? + # or only allow this for pandas indexes? + index = indexes[dim].array + indexes[dim] = PandasIndexAdapter(index[kwargs_new[dim]]) difference = self._replace_with_new_dims(variables, indexes=indexes) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 3870d433142..c0ed9c5ec8c 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -119,15 +119,17 @@ def equals(self, other): def union(self, other): if isinstance(other, pd.Index): - return self.array.union(other) + new_idx = self.array.union(other) else: - return self.array.union(other.array) + new_idx = self.array.union(other.array) + return type(self)(new_idx) def intersection(self, other): if isinstance(other, pd.Index): - return self.array.intersection(other) + new_idx = self.array.intersection(other) else: - return self.array.intersection(other.array) + new_idx = self.array.intersection(other.array) + return type(self)(new_idx) def __getitem__( self, indexer @@ -305,13 +307,17 @@ def isel_variable_and_index( return new_variable, new_index -def roll_index(index: pd.Index, count: int, axis: int = 0) -> pd.Index: +def roll_index( + index: PandasIndexAdapter, count: int, axis: int = 0 +) -> PandasIndexAdapter: """Roll an pandas.Index.""" - count %= index.shape[0] + pd_index = index.array + count %= pd_index.shape[0] if count != 0: - return index[-count:].append(index[:-count]) + new_idx = pd_index[-count:].append(pd_index[:-count]) else: - return index[:] + new_idx = pd_index[:] + return PandasIndexAdapter(new_idx) def propagate_indexes( diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 867014c125c..3546c4e84a1 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .indexes import IndexAdapter +from .indexes import IndexAdapter, PandasIndexAdapter from .utils import Frozen, compat_dict_union, dict_equiv, equivalent from .variable import Variable, as_variable, assert_unique_multiindex_level_names @@ -959,7 +959,10 @@ def dataset_update_method( other[key] = value.drop_vars(coord_names) # use ds.coords and not ds.indexes, else str coords are cast to object - indexes = {key: dataset.coords[key] for key in dataset.indexes.keys()} + # TODO: benbovy - flexible indexes: fix this (it only works with pandas indexes) + indexes = { + key: PandasIndexAdapter(dataset.coords[key]) for key in dataset.indexes.keys() + } return merge_core( [dataset, other], priority_arg=1, diff --git a/xarray/core/missing.py b/xarray/core/missing.py index e6dd8b537a0..6b96f8aad5a 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -317,9 +317,11 @@ def interp_na( if not is_scalar(max_gap): raise ValueError("max_gap must be a scalar.") + # TODO: benbovy - flexible indexes: update when CFTimeIndex (and DatetimeIndex?) + # has its own class inheriting from xarray.Index if ( dim in self.indexes - and isinstance(self.indexes[dim], (pd.DatetimeIndex, CFTimeIndex)) + and isinstance(self.indexes[dim].array, (pd.DatetimeIndex, CFTimeIndex)) and use_coordinate ): # Convert to float diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 2a2fcfc1849..3c74a83ecab 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -27,6 +27,8 @@ import numpy as np +from xarray.core.indexes import PandasIndexAdapter + from .alignment import align from .dataarray import DataArray from .dataset import Dataset @@ -499,10 +501,16 @@ def subset_dataset_to_block( } expected["data_vars"] = set(template.data_vars.keys()) # type: ignore expected["coords"] = set(template.coords.keys()) # type: ignore - expected["indexes"] = { - dim: indexes[dim][_get_chunk_slicer(dim, chunk_index, output_chunk_bounds)] - for dim in indexes - } + # TODO: benbovy - flexible indexes: clean this up + # for now assumes pandas index (thus can be indexed) but it won't be the case for + # all indexes + expected_indexes = {} + for dim in indexes: + idx = indexes[dim].array[ + _get_chunk_slicer(dim, chunk_index, output_chunk_bounds) + ] + expected_indexes[dim] = PandasIndexAdapter(idx) + expected["indexes"] = expected_indexes from_wrapper = (gname,) + chunk_tuple graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index fb15f15df1a..7cac39d36fb 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1391,13 +1391,13 @@ def test_sel_dataarray_mindex(self): ) actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="x")) - actual_sel = mds.sel(x=DataArray(mds.indexes["x"][:3], dims="x")) + actual_sel = mds.sel(x=DataArray(midx[:3], dims="x")) assert actual_isel["x"].dims == ("x",) assert actual_sel["x"].dims == ("x",) assert_identical(actual_isel, actual_sel) actual_isel = mds.isel(x=xr.DataArray(np.arange(3), dims="z")) - actual_sel = mds.sel(x=Variable("z", mds.indexes["x"][:3])) + actual_sel = mds.sel(x=Variable("z", midx[:3])) assert actual_isel["x"].dims == ("z",) assert actual_sel["x"].dims == ("z",) assert_identical(actual_isel, actual_sel) @@ -1407,7 +1407,7 @@ def test_sel_dataarray_mindex(self): x=xr.DataArray(np.arange(3), dims="z", coords={"z": [0, 1, 2]}) ) actual_sel = mds.sel( - x=xr.DataArray(mds.indexes["x"][:3], dims="z", coords={"z": [0, 1, 2]}) + x=xr.DataArray(midx[:3], dims="z", coords={"z": [0, 1, 2]}) ) assert actual_isel["x"].dims == ("z",) assert actual_sel["x"].dims == ("z",) @@ -2688,20 +2688,22 @@ def test_rename_does_not_change_CFTimeIndex_type(self): renamed = orig.rename(time="time_new") assert "time_new" in renamed.indexes - assert isinstance(renamed.indexes["time_new"], CFTimeIndex) - assert renamed.indexes["time_new"].name == "time_new" + # TODO: benbovy - flexible indexes: update when CFTimeIndex + # inherits from xarray.Index + assert isinstance(renamed.indexes["time_new"].array, CFTimeIndex) + assert renamed.indexes["time_new"].array.name == "time_new" # check original has not changed assert "time" in orig.indexes - assert isinstance(orig.indexes["time"], CFTimeIndex) - assert orig.indexes["time"].name == "time" + assert isinstance(orig.indexes["time"].array, CFTimeIndex) + assert orig.indexes["time"].array.name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() - assert isinstance(renamed.indexes["time"], CFTimeIndex) + assert isinstance(renamed.indexes["time"].array, CFTimeIndex) renamed = orig.rename_vars() - assert isinstance(renamed.indexes["time"], CFTimeIndex) + assert isinstance(renamed.indexes["time"].array, CFTimeIndex) def test_rename_does_not_change_DatetimeIndex_type(self): # make sure DatetimeIndex is conderved on rename @@ -2711,20 +2713,22 @@ def test_rename_does_not_change_DatetimeIndex_type(self): renamed = orig.rename(time="time_new") assert "time_new" in renamed.indexes - assert isinstance(renamed.indexes["time_new"], DatetimeIndex) - assert renamed.indexes["time_new"].name == "time_new" + # TODO: benbovy - flexible indexes: update when DatetimeIndex + # inherits from xarray.Index? + assert isinstance(renamed.indexes["time_new"].array, DatetimeIndex) + assert renamed.indexes["time_new"].array.name == "time_new" # check original has not changed assert "time" in orig.indexes - assert isinstance(orig.indexes["time"], DatetimeIndex) - assert orig.indexes["time"].name == "time" + assert isinstance(orig.indexes["time"].array, DatetimeIndex) + assert orig.indexes["time"].array.name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() - assert isinstance(renamed.indexes["time"], DatetimeIndex) + assert isinstance(renamed.indexes["time"].array, DatetimeIndex) renamed = orig.rename_vars() - assert isinstance(renamed.indexes["time"], DatetimeIndex) + assert isinstance(renamed.indexes["time"].array, DatetimeIndex) def test_swap_dims(self): original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42}) @@ -2733,7 +2737,9 @@ def test_swap_dims(self): assert_identical(expected, actual) assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) - pd.testing.assert_index_equal(actual.indexes["y"], expected.indexes["y"]) + pd.testing.assert_index_equal( + actual.indexes["y"].array, expected.indexes["y"].array + ) roundtripped = actual.swap_dims({"y": "x"}) assert_identical(original.set_coords("y"), roundtripped) @@ -2764,7 +2770,9 @@ def test_swap_dims(self): assert_identical(expected, actual) assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) - pd.testing.assert_index_equal(actual.indexes["y"], expected.indexes["y"]) + pd.testing.assert_index_equal( + actual.indexes["y"].array, expected.indexes["y"].array + ) def test_expand_dims_error(self): original = Dataset( @@ -3141,7 +3149,9 @@ def test_to_stacked_array_dtype_dims(self): D = xr.Dataset({"a": a, "b": b}) sample_dims = ["x"] y = D.to_stacked_array("features", sample_dims) - assert y.indexes["features"].levels[1].dtype == D.y.dtype + # TODO: benbovy - flexible indexes: update when MultiIndex has its own class + # inherited from xarray.Index + assert y.indexes["features"].array.levels[1].dtype == D.y.dtype assert y.dims == ("x", "features") def test_to_stacked_array_to_unstacked_dataset(self): From 7b3e39c96205a2354b328e0bf1da3d1b9267a44e Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 29 Apr 2021 16:24:45 +0200 Subject: [PATCH 04/20] fix merge glitch --- xarray/core/variable.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 722cb525d2a..06bca4e999f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -27,13 +27,7 @@ from .arithmetic import VariableArithmetic from .common import AbstractArray from .indexes import PandasIndexAdapter -from .indexing import ( - BasicIndexer, - OuterIndexer, - PandasIndexAdapter, - VectorizedIndexer, - as_indexable, -) +from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable from .options import _get_keep_attrs from .pycompat import ( cupy_array_type, From c1ecd4992029b276b404c5b84f405f393b0616a6 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Thu, 29 Apr 2021 17:36:03 +0200 Subject: [PATCH 05/20] fix group bins tests --- xarray/core/alignment.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 19a0a86136d..f15e7c3a9c4 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -564,7 +564,9 @@ def reindex_variables( "from that to be indexed along {:s}".format(str(indexer.dims), dim) ) - target = new_indexes[dim] = PandasIndexAdapter(np.asarray(indexers[dim])) + target = new_indexes[dim] = PandasIndexAdapter( + safe_cast_to_index(indexers[dim]) + ) if dim in indexes: # TODO (benbovy - flexible indexes): support other indexes than pd.Index? From 51074d89d7b283d2e5b43d87c8c687d371fbc5eb Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 30 Apr 2021 13:36:06 +0200 Subject: [PATCH 06/20] add xindexes property Use it internally instead of indexes --- xarray/core/alignment.py | 6 +-- xarray/core/combine.py | 4 +- xarray/core/common.py | 4 +- xarray/core/coordinates.py | 16 ++++-- xarray/core/dataarray.py | 9 +++- xarray/core/dataset.py | 59 ++++++++++++---------- xarray/core/indexing.py | 2 +- xarray/core/merge.py | 8 +-- xarray/core/missing.py | 4 +- xarray/core/parallel.py | 14 +++--- xarray/tests/test_cftimeindex.py | 2 +- xarray/tests/test_cftimeindex_resample.py | 4 +- xarray/tests/test_conventions.py | 2 +- xarray/tests/test_dataarray.py | 43 +++++++++------- xarray/tests/test_dataset.py | 61 +++++++++++++---------- 15 files changed, 136 insertions(+), 102 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index f15e7c3a9c4..6f185cf78f8 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -64,7 +64,7 @@ def _override_indexes(objects, all_indexes, exclude): objects = list(objects) for idx, obj in enumerate(objects[1:]): new_indexes = {} - for dim in obj.indexes: + for dim in obj.xindexes: if dim not in exclude: new_indexes[dim] = all_indexes[dim][0] objects[idx + 1] = obj._overwrite_indexes(new_indexes) @@ -285,7 +285,7 @@ def align( if dim not in exclude: all_coords[dim].append(obj.coords[dim]) try: - index = obj.indexes[dim] + index = obj.xindexes[dim] except KeyError: unlabeled_dim_sizes[dim].add(obj.sizes[dim]) else: @@ -474,7 +474,7 @@ def reindex_like_indexers( ValueError If any dimensions without labels have different sizes. """ - indexers = {k: v for k, v in other.indexes.items() if k in target.dims} + indexers = {k: v for k, v in other.xindexes.items() if k in target.dims} for dim in other.dims: if dim not in indexers and dim in target.dims: diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 688e91df22c..cecf87f97cd 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -69,7 +69,7 @@ def _infer_concat_order_from_coords(datasets): if dim in ds0: # Need to read coordinate values to do ordering - indexes = [ds.indexes.get(dim) for ds in datasets] + indexes = [ds.xindexes.get(dim) for ds in datasets] if any(index is None for index in indexes): raise ValueError( "Every dimension needs a coordinate for " @@ -808,7 +808,7 @@ def combine_by_coords( # Check the overall coordinates are monotonically increasing # TODO (benbovy - flexible indexes): only with pandas.Index? for dim in concat_dims: - indexes = concatenated.indexes.get(dim) + indexes = concatenated.xindexes.get(dim) if not ( indexes.array.is_monotonic_increasing or indexes.array.is_monotonic_decreasing diff --git a/xarray/core/common.py b/xarray/core/common.py index 436a18d7f80..ea67635aea1 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -406,7 +406,7 @@ def get_index(self, key: Hashable) -> pd.Index: raise KeyError(key) try: - return self.indexes[key].array + return self.xindexes[key].array except KeyError: return pd.Index(range(self.sizes[key]), name=key) @@ -1161,7 +1161,7 @@ def resample( ) # TODO (benbovy - flexible indexes): update when CFTimeIndex is an IndexAdpater subclass - if isinstance(self.indexes[dim_name].array, CFTimeIndex): + if isinstance(self.xindexes[dim_name].array, CFTimeIndex): from .resample_cftime import CFTimeGrouper grouper = CFTimeGrouper(freq, closed, label, base, loffset) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index d913e870d17..ba7c0c1f4e7 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -52,6 +52,10 @@ def dims(self) -> Union[Mapping[Hashable, int], Tuple[Hashable, ...]]: def indexes(self) -> Indexes: return self._data.indexes # type: ignore[attr-defined] + @property + def xindexes(self) -> Indexes: + return self._data.xindexes # type: ignore[attr-defined] + @property def variables(self): raise NotImplementedError() @@ -157,7 +161,7 @@ def to_index(self, ordered_dims: Sequence[Hashable] = None) -> pd.Index: def update(self, other: Mapping[Hashable, Any]) -> None: other_vars = getattr(other, "variables", other) coords, indexes = merge_coords( - [self.variables, other_vars], priority_arg=1, indexes=self.indexes + [self.variables, other_vars], priority_arg=1, indexes=self.xindexes ) self._update_coords(coords, indexes) @@ -165,7 +169,7 @@ def _merge_raw(self, other, reflexive): """For use with binary arithmetic.""" if other is None: variables = dict(self.variables) - indexes = dict(self.indexes) + indexes = dict(self.xindexes) else: coord_list = [self, other] if not reflexive else [other, self] variables, indexes = merge_coordinates_without_align(coord_list) @@ -180,7 +184,9 @@ def _merge_inplace(self, other): # don't include indexes in prioritized, because we didn't align # first and we want indexes to be checked prioritized = { - k: (v, None) for k, v in self.variables.items() if k not in self.indexes + k: (v, None) + for k, v in self.variables.items() + if k not in self.xindexes } variables, indexes = merge_coordinates_without_align( [self, other], prioritized @@ -285,7 +291,7 @@ def _update_coords( # TODO(shoyer): once ._indexes is always populated by a dict, modify # it to update inplace instead. - original_indexes = dict(self._data.indexes) + original_indexes = dict(self._data.xindexes) original_indexes.update(indexes) self._data._indexes = original_indexes @@ -343,7 +349,7 @@ def _update_coords( # TODO(shoyer): once ._indexes is always populated by a dict, modify # it to update inplace instead. - original_indexes = dict(self._data.indexes) + original_indexes = dict(self._data.xindexes) original_indexes.update(indexes) self._data._indexes = original_indexes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 569056c6f51..9437537fbb3 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -781,6 +781,13 @@ def encoding(self, value: Mapping[Hashable, Any]) -> None: @property def indexes(self) -> Indexes: """Mapping of pandas.Index objects used for label based indexing""" + if self._indexes is None: + self._indexes = default_indexes(self._coords, self.dims) + return Indexes({k: idx.array for k, idx in self._indexes.items()}) + + @property + def xindexes(self) -> Indexes: + """Mapping of xarray.Index objects used for label based indexing""" if self._indexes is None: self._indexes = default_indexes(self._coords, self.dims) return Indexes(self._indexes) @@ -2182,7 +2189,7 @@ def to_unstacked_dataset(self, dim, level=0): # TODO: benbovy - flexible indexes: update when MultIndex has its own # class inheriting from xarray.Index - idx = self.indexes[dim].array + idx = self.xindexes[dim].array if not isinstance(idx, pd.MultiIndex): raise ValueError(f"'{dim}' is not a stacked coordinate") diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9d92379a440..2af682cb8f2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1171,7 +1171,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, IndexAdapter]) -> "Dataset": return self variables = self._variables.copy() - new_indexes = dict(self.indexes) + new_indexes = dict(self.xindexes) for name, idx in indexes.items(): variables[name] = IndexVariable(name, idx) new_indexes[name] = idx @@ -1318,7 +1318,7 @@ def _level_coords(self) -> Dict[str, Hashable]: coordinate name. """ level_coords: Dict[str, Hashable] = {} - for name, index_adapter in self.indexes.items(): + for name, index_adapter in self.xindexes.items(): index = index_adapter # TODO: benbovy - flexible indexes: update when MultIndex has its own xarray class. if isinstance(index.array, pd.MultiIndex): @@ -1362,8 +1362,8 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": if set(self.variables[k].dims) <= needed_dims: variables[k] = self._variables[k] coord_names.add(k) - if k in self.indexes: - indexes[k] = self.indexes[k] + if k in self.xindexes: + indexes[k] = self.xindexes[k] return self._replace(variables, coord_names, dims, indexes=indexes) @@ -1502,7 +1502,7 @@ def __delitem__(self, key: Hashable) -> None: """Remove a variable from this dataset.""" del self._variables[key] self._coord_names.discard(key) - if key in self.indexes: + if key in self.xindexes: assert self._indexes is not None del self._indexes[key] self._dims = calculate_dimensions(self._variables) @@ -1580,6 +1580,13 @@ def identical(self, other: "Dataset") -> bool: @property def indexes(self) -> Indexes: """Mapping of pandas.Index objects used for label based indexing""" + if self._indexes is None: + self._indexes = default_indexes(self._variables, self._dims) + return Indexes({k: idx.array for k, idx in self._indexes.items()}) + + @property + def xindexes(self) -> Indexes: + """Mapping of xarray.Index objects used for label based indexing""" if self._indexes is None: self._indexes = default_indexes(self._variables, self._dims) return Indexes(self._indexes) @@ -2046,7 +2053,7 @@ def _validate_indexers( if v.dtype.kind in "US": # TODO: benbovy - flexible indexes # update when CFTimeIndex has its own xarray index class - index = self.indexes[k].array + index = self.xindexes[k].array if isinstance(index, pd.DatetimeIndex): v = v.astype("datetime64[ns]") elif isinstance(index, xr.CFTimeIndex): @@ -2230,9 +2237,9 @@ def _isel_fancy( if drop and name in var_indexers: continue # drop this variable - if name in self.indexes: + if name in self.xindexes: new_var, new_index = isel_variable_and_index( - name, var, self.indexes[name], var_indexers + name, var, self.xindexes[name], var_indexers ) if new_index is not None: indexes[name] = new_index @@ -2791,7 +2798,7 @@ def _reindex( variables, indexes = alignment.reindex_variables( self.variables, self.sizes, - self.indexes, + self.xindexes, indexers, method, tolerance, @@ -2986,7 +2993,7 @@ def _validate_interp_indexer(x, new_x): variables[name] = var coord_names = obj._coord_names & variables.keys() - indexes = {k: v for k, v in obj.indexes.items() if k not in indexers} + indexes = {k: v for k, v in obj.xindexes.items() if k not in indexers} selected = self._replace_with_new_dims( variables.copy(), coord_names, indexes=indexes ) @@ -3092,7 +3099,7 @@ def _rename_indexes(self, name_dict, dims_set): if self._indexes is None: return None indexes = {} - for k, v in self.indexes.items(): + for k, v in self.xindexes.items(): index = v.array new_name = name_dict.get(k, k) if new_name not in dims_set: @@ -3324,8 +3331,8 @@ def swap_dims( dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) if k in result_dims: var = v.to_index_variable() - if k in self.indexes: - indexes[k] = self.indexes[k] + if k in self.xindexes: + indexes[k] = self.xindexes[k] else: new_index = var.to_index() if new_index.nlevels == 1: @@ -3594,12 +3601,12 @@ def reorder_levels( """ dim_order = either_dict_or_kwargs(dim_order, dim_order_kwargs, "reorder_levels") variables = self._variables.copy() - indexes = dict(self.indexes) + indexes = dict(self.xindexes) for dim, order in dim_order.items(): coord = self._variables[dim] # TODO: benbovy - flexible indexes: update when MultiIndex # has its own class inherited from xarray.Index - index = self.indexes[dim].array + index = self.xindexes[dim].array if not isinstance(index, pd.MultiIndex): raise ValueError(f"coordinate {dim} has no MultiIndex") new_index = index.reorder_levels(order) @@ -3631,7 +3638,7 @@ def _stack_once(self, dims, new_dim): coord_names = set(self._coord_names) - set(dims) | {new_dim} - indexes = {k: v for k, v in self.indexes.items() if k not in dims} + indexes = {k: v for k, v in self.xindexes.items() if k not in dims} indexes[new_dim] = PandasIndexAdapter(idx) return self._replace_with_new_dims( @@ -3786,7 +3793,7 @@ def ensure_stackable(val): # input a dummy value for the singleton dimension. # TODO: benbovy - flexible indexes: update when MultIndex has its own # class inheriting from xarray.Index - idx = data_array.indexes[new_dim].array + idx = data_array.xindexes[new_dim].array levels = [idx.levels[0]] + [ level.astype(self[level.name].dtype) for level in idx.levels[1:] ] @@ -3803,7 +3810,7 @@ def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset": index = remove_unused_levels_categories(index) variables: Dict[Hashable, Variable] = {} - indexes = {k: v for k, v in self.indexes.items() if k != dim} + indexes = {k: v for k, v in self.xindexes.items() if k != dim} for name, var in self.variables.items(): if name != dim: @@ -3848,7 +3855,7 @@ def _unstack_full_reindex( new_dim_sizes = [lev.size for lev in index.levels] variables: Dict[Hashable, Variable] = {} - indexes = {k: v for k, v in self.indexes.items() if k != dim} + indexes = {k: v for k, v in self.xindexes.items() if k != dim} for name, var in obj.variables.items(): if name != dim: @@ -4122,7 +4129,7 @@ def drop_vars( variables = {k: v for k, v in self._variables.items() if k not in names} coord_names = {k for k in self._coord_names if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k not in names} + indexes = {k: v for k, v in self.xindexes.items() if k not in names} return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes ) @@ -4832,7 +4839,7 @@ def reduce( ) coord_names = {k for k in self.coords if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k in variables} + indexes = {k: v for k, v in self.xindexes.items() if k in variables} attrs = self.attrs if keep_attrs else None return self._replace_with_new_dims( variables, coord_names=coord_names, attrs=attrs, indexes=indexes @@ -5600,7 +5607,7 @@ def diff(self, dim, n=1, label="upper"): else: variables[name] = var - indexes = dict(self.indexes) + indexes = dict(self.xindexes) if dim in indexes: # TODO: benbovy - flexible indexes: check slicing of xarray indexes? # or only allow this for pandas indexes? @@ -5742,14 +5749,14 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): if roll_coords: indexes = {} - for k, v in self.indexes.items(): + for k, v in self.xindexes.items(): (dim,) = self.variables[k].dims if dim in shifts: indexes[k] = roll_index(v, shifts[dim]) else: indexes[k] = v else: - indexes = dict(self.indexes) + indexes = dict(self.xindexes) return self._replace(variables, indexes=indexes) @@ -5942,7 +5949,7 @@ def quantile( # construct the new dataset coord_names = {k for k in self.coords if k in variables} - indexes = {k: v for k, v in self.indexes.items() if k in variables} + indexes = {k: v for k, v in self.xindexes.items() if k in variables} if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None @@ -6158,7 +6165,7 @@ def _integrate_one(self, coord, datetime_unit=None): variables[k] = Variable(v_dims, integ) else: variables[k] = v - indexes = {k: v for k, v in self.indexes.items() if k in variables} + indexes = {k: v for k, v in self.xindexes.items() if k in variables} return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes ) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 4dabbc2cf1a..1228cd4d3cb 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -257,7 +257,7 @@ def remap_label_indexers(data_obj, indexers, method=None, tolerance=None): dim_indexers = get_dim_indexers(data_obj, indexers) for dim, label in dim_indexers.items(): try: - index = data_obj.indexes[dim].array + index = data_obj.xindexes[dim].array except KeyError: # no index for this dimension: reuse the provided labels if method is not None or tolerance is not None: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 3546c4e84a1..52acc96bdb6 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -264,13 +264,13 @@ def append_all(variables, indexes): for mapping in list_of_mappings: if isinstance(mapping, Dataset): - append_all(mapping.variables, mapping.indexes) + append_all(mapping.variables, mapping.xindexes) continue for name, variable in mapping.items(): if isinstance(variable, DataArray): coords = variable._coords.copy() # use private API for speed - indexes = dict(variable.indexes) + indexes = dict(variable.xindexes) # explicitly overwritten variables should take precedence coords.pop(name, None) indexes.pop(name, None) @@ -295,7 +295,7 @@ def collect_from_coordinates( for coords in list_of_coords: variables = coords.variables - indexes = coords.indexes + indexes = coords.xindexes for name, variable in variables.items(): value = grouped.setdefault(name, []) value.append((variable, indexes.get(name))) @@ -961,7 +961,7 @@ def dataset_update_method( # use ds.coords and not ds.indexes, else str coords are cast to object # TODO: benbovy - flexible indexes: fix this (it only works with pandas indexes) indexes = { - key: PandasIndexAdapter(dataset.coords[key]) for key in dataset.indexes.keys() + key: PandasIndexAdapter(dataset.coords[key]) for key in dataset.xindexes.keys() } return merge_core( [dataset, other], diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 92aab0252b1..6068fb47d22 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -320,8 +320,8 @@ def interp_na( # TODO: benbovy - flexible indexes: update when CFTimeIndex (and DatetimeIndex?) # has its own class inheriting from xarray.Index if ( - dim in self.indexes - and isinstance(self.indexes[dim].array, (pd.DatetimeIndex, CFTimeIndex)) + dim in self.xindexes + and isinstance(self.xindexes[dim].array, (pd.DatetimeIndex, CFTimeIndex)) and use_coordinate ): # Convert to float diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 500eb6bc128..9b35b0223bd 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -293,7 +293,7 @@ def _wrapper( ) # check that index lengths and values are as expected - for name, index in result.indexes.items(): + for name, index in result.xindexes.items(): if name in expected["shapes"]: if len(index) != expected["shapes"][name]: raise ValueError( @@ -359,27 +359,27 @@ def _wrapper( # check that chunk sizes are compatible input_chunks = dict(npargs[0].chunks) - input_indexes = dict(npargs[0].indexes) + input_indexes = dict(npargs[0].xindexes) for arg in xarray_objs[1:]: assert_chunks_compatible(npargs[0], arg) input_chunks.update(arg.chunks) - input_indexes.update(arg.indexes) + input_indexes.update(arg.xindexes) if template is None: # infer template by providing zero-shaped arrays template = infer_template(func, aligned[0], *args, **kwargs) - template_indexes = set(template.indexes) + template_indexes = set(template.xindexes) preserved_indexes = template_indexes & set(input_indexes) new_indexes = template_indexes - set(input_indexes) indexes = {dim: input_indexes[dim] for dim in preserved_indexes} - indexes.update({k: template.indexes[k] for k in new_indexes}) + indexes.update({k: template.xindexes[k] for k in new_indexes}) output_chunks = { dim: input_chunks[dim] for dim in template.dims if dim in input_chunks } else: # template xarray object has been provided with proper sizes and chunk shapes - indexes = dict(template.indexes) + indexes = dict(template.xindexes) if isinstance(template, DataArray): output_chunks = dict( zip(template.dims, template.chunks) # type: ignore[arg-type] @@ -554,7 +554,7 @@ def subset_dataset_to_block( hlg.layers[gname_l] = layer result = Dataset(coords=indexes, attrs=template.attrs) - for index in result.indexes: + for index in result.xindexes: result[index].attrs = template[index].attrs result[index].encoding = template[index].encoding diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 275320acd08..476117c6e10 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -696,7 +696,7 @@ def test_concat_cftimeindex(date_type): ) da = xr.concat([da1, da2], dim="time") - assert isinstance(da.indexes["time"].array, CFTimeIndex) + assert isinstance(da.xindexes["time"].array, CFTimeIndex) @requires_cftime diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 2e33ab21a04..b771b8c5197 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -100,7 +100,7 @@ def test_resample(freqs, closed, label, base): .mean() ) # TODO (benbovy - flexible indexes): update when CFTimeIndex is a IndexAdpater subclass - da_cftime["time"] = da_cftime.indexes["time"].array.to_datetimeindex() + da_cftime["time"] = da_cftime.xindexes["time"].array.to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) @@ -147,5 +147,5 @@ def test_calendars(calendar): .mean() ) # TODO (benbovy - flexible indexes): update when CFTimeIndex is a IndexAdpater subclass - da_cftime["time"] = da_cftime.indexes["time"].array.to_datetimeindex() + da_cftime["time"] = da_cftime.xindexes["time"].array.to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 3608a53f747..cd8e3419231 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -280,7 +280,7 @@ def test_decode_cf_with_dask(self): assert all( isinstance(var.data, da.Array) for name, var in decoded.variables.items() - if name not in decoded.indexes + if name not in decoded.xindexes ) assert_identical(decoded, conventions.decode_cf(original).compute()) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c6eaffbb040..e776e686a7b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -24,7 +24,7 @@ from xarray.convert import from_cdms2 from xarray.core import dtypes from xarray.core.common import full_like -from xarray.core.indexes import propagate_indexes +from xarray.core.indexes import IndexAdapter, PandasIndexAdapter, propagate_indexes from xarray.core.utils import is_scalar from xarray.tests import ( LooseVersion, @@ -147,10 +147,17 @@ def test_data_property(self): def test_indexes(self): array = DataArray(np.zeros((2, 3)), [("x", [0, 1]), ("y", ["a", "b", "c"])]) - expected = {"x": pd.Index([0, 1]), "y": pd.Index(["a", "b", "c"])} - assert array.indexes.keys() == expected.keys() - for k in expected: - assert array.indexes[k].equals(expected[k]) + expected_indexes = {"x": pd.Index([0, 1]), "y": pd.Index(["a", "b", "c"])} + expected_xindexes = { + k: PandasIndexAdapter(idx) for k, idx in expected_indexes.items() + } + assert array.xindexes.keys() == expected_xindexes.keys() + assert array.indexes.keys() == expected_indexes.keys() + assert all([isinstance(idx, pd.Index) for idx in array.indexes.values()]) + assert all([isinstance(idx, IndexAdapter) for idx in array.xindexes.values()]) + for k in expected_indexes: + assert array.xindexes[k].equals(expected_xindexes[k]) + assert array.indexes[k].equals(expected_indexes[k]) def test_get_index(self): array = DataArray(np.zeros((2, 3)), coords={"x": ["a", "b"]}, dims=["x", "y"]) @@ -1459,7 +1466,7 @@ def test_coords_alignment(self): def test_set_coords_update_index(self): actual = DataArray([1, 2, 3], [("x", [1, 2, 3])]) actual.coords["x"] = ["a", "b", "c"] - assert actual.indexes["x"].equals(pd.Index(["a", "b", "c"])) + assert actual.xindexes["x"].equals(pd.Index(["a", "b", "c"])) def test_coords_replacement_alignment(self): # regression test for GH725 @@ -1479,7 +1486,7 @@ def test_coords_delitem_delete_indexes(self): # regression test for GH3746 arr = DataArray(np.ones((2,)), dims="x", coords={"x": [0, 1]}) del arr.coords["x"] - assert "x" not in arr.indexes + assert "x" not in arr.xindexes def test_broadcast_like(self): arr1 = DataArray( @@ -1627,18 +1634,18 @@ def test_swap_dims(self): expected = DataArray(array.values, {"y": list("abc")}, dims="y") actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) - for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name].array, actual.indexes[dim_name].array + expected.xindexes[dim_name].array, actual.xindexes[dim_name].array ) array = DataArray(np.random.randn(3), {"x": list("abc")}, "x") expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y") actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) - for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name], actual.indexes[dim_name] + expected.xindexes[dim_name], actual.xindexes[dim_name] ) # as kwargs @@ -1646,9 +1653,9 @@ def test_swap_dims(self): expected = DataArray(array.values, {"x": ("y", list("abc"))}, dims="y") actual = array.swap_dims(x="y") assert_identical(expected, actual) - for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name], actual.indexes[dim_name] + expected.xindexes[dim_name], actual.xindexes[dim_name] ) # multiindex case @@ -1657,9 +1664,9 @@ def test_swap_dims(self): expected = DataArray(array.values, {"y": idx}, "y") actual = array.swap_dims({"x": "y"}) assert_identical(expected, actual) - for dim_name in set().union(expected.indexes.keys(), actual.indexes.keys()): + for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.indexes[dim_name].array, actual.indexes[dim_name].array + expected.xindexes[dim_name].array, actual.xindexes[dim_name].array ) def test_expand_dims_error(self): @@ -4329,12 +4336,12 @@ def test_matmul_align_coords(self): def test_binary_op_propagate_indexes(self): # regression test for GH2227 self.dv["x"] = np.arange(self.dv.sizes["x"]) - expected = self.dv.indexes["x"] + expected = self.dv.xindexes["x"] - actual = (self.dv * 10).indexes["x"] + actual = (self.dv * 10).xindexes["x"] assert expected is actual - actual = (self.dv > 10).indexes["x"] + actual = (self.dv > 10).xindexes["x"] assert expected is actual def test_binary_op_join_setting(self): diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 1cd1d32ff2b..dfb2c95fd83 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -28,6 +28,7 @@ from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import dtypes, indexing, utils from xarray.core.common import duck_array_ops, full_like +from xarray.core.indexes import IndexAdapter from xarray.core.pycompat import integer_types from xarray.core.utils import is_scalar @@ -580,9 +581,15 @@ def test_properties(self): assert "numbers" not in ds.data_vars assert len(ds.data_vars) == 3 + assert set(ds.xindexes) == {"dim2", "dim3", "time"} + assert len(ds.xindexes) == 3 + assert "dim2" in repr(ds.xindexes) + assert all([isinstance(idx, IndexAdapter) for idx in ds.xindexes.values()]) + assert set(ds.indexes) == {"dim2", "dim3", "time"} assert len(ds.indexes) == 3 assert "dim2" in repr(ds.indexes) + assert all([isinstance(idx, pd.Index) for idx in ds.indexes.values()]) assert list(ds.coords) == ["time", "dim2", "dim3", "numbers"] assert "dim2" in ds.coords @@ -745,12 +752,12 @@ def test_coords_modify(self): # regression test for GH3746 del actual.coords["x"] - assert "x" not in actual.indexes + assert "x" not in actual.xindexes def test_update_index(self): actual = Dataset(coords={"x": [1, 2, 3]}) actual["x"] = ["a", "b", "c"] - assert actual.indexes["x"].equals(pd.Index(["a", "b", "c"])) + assert actual.xindexes["x"].equals(pd.Index(["a", "b", "c"])) def test_coords_setitem_with_new_dimension(self): actual = Dataset() @@ -1042,19 +1049,19 @@ def test_isel(self): assert {"time": 20, "dim2": 9, "dim3": 10} == ret.dims assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) - assert set(data.indexes) == set(ret.indexes) + assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=slice(2), dim1=0, dim2=slice(5)) assert {"time": 2, "dim2": 5, "dim3": 10} == ret.dims assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) - assert set(data.indexes) == set(ret.indexes) + assert set(data.xindexes) == set(ret.xindexes) ret = data.isel(time=0, dim1=0, dim2=slice(5)) assert {"dim2": 5, "dim3": 10} == ret.dims assert set(data.data_vars) == set(ret.data_vars) assert set(data.coords) == set(ret.coords) - assert set(data.indexes) == set(list(ret.indexes) + ["time"]) + assert set(data.xindexes) == set(list(ret.xindexes) + ["time"]) def test_isel_fancy(self): # isel with fancy indexing. @@ -2398,7 +2405,7 @@ def test_drop_labels_by_keyword(self): with pytest.warns(FutureWarning): data.drop(arr.coords) with pytest.warns(FutureWarning): - data.drop(arr.indexes) + data.drop(arr.xindexes) assert_array_equal(ds1.coords["x"], ["b"]) assert_array_equal(ds2.coords["x"], ["b"]) @@ -2688,23 +2695,23 @@ def test_rename_does_not_change_CFTimeIndex_type(self): orig = Dataset(coords={"time": time}) renamed = orig.rename(time="time_new") - assert "time_new" in renamed.indexes + assert "time_new" in renamed.xindexes # TODO: benbovy - flexible indexes: update when CFTimeIndex # inherits from xarray.Index - assert isinstance(renamed.indexes["time_new"].array, CFTimeIndex) - assert renamed.indexes["time_new"].array.name == "time_new" + assert isinstance(renamed.xindexes["time_new"].array, CFTimeIndex) + assert renamed.xindexes["time_new"].array.name == "time_new" # check original has not changed - assert "time" in orig.indexes - assert isinstance(orig.indexes["time"].array, CFTimeIndex) - assert orig.indexes["time"].array.name == "time" + assert "time" in orig.xindexes + assert isinstance(orig.xindexes["time"].array, CFTimeIndex) + assert orig.xindexes["time"].array.name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() - assert isinstance(renamed.indexes["time"].array, CFTimeIndex) + assert isinstance(renamed.xindexes["time"].array, CFTimeIndex) renamed = orig.rename_vars() - assert isinstance(renamed.indexes["time"].array, CFTimeIndex) + assert isinstance(renamed.xindexes["time"].array, CFTimeIndex) def test_rename_does_not_change_DatetimeIndex_type(self): # make sure DatetimeIndex is conderved on rename @@ -2713,23 +2720,23 @@ def test_rename_does_not_change_DatetimeIndex_type(self): orig = Dataset(coords={"time": time}) renamed = orig.rename(time="time_new") - assert "time_new" in renamed.indexes + assert "time_new" in renamed.xindexes # TODO: benbovy - flexible indexes: update when DatetimeIndex # inherits from xarray.Index? - assert isinstance(renamed.indexes["time_new"].array, DatetimeIndex) - assert renamed.indexes["time_new"].array.name == "time_new" + assert isinstance(renamed.xindexes["time_new"].array, DatetimeIndex) + assert renamed.xindexes["time_new"].array.name == "time_new" # check original has not changed - assert "time" in orig.indexes - assert isinstance(orig.indexes["time"].array, DatetimeIndex) - assert orig.indexes["time"].array.name == "time" + assert "time" in orig.xindexes + assert isinstance(orig.xindexes["time"].array, DatetimeIndex) + assert orig.xindexes["time"].array.name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() - assert isinstance(renamed.indexes["time"].array, DatetimeIndex) + assert isinstance(renamed.xindexes["time"].array, DatetimeIndex) renamed = orig.rename_vars() - assert isinstance(renamed.indexes["time"].array, DatetimeIndex) + assert isinstance(renamed.xindexes["time"].array, DatetimeIndex) def test_swap_dims(self): original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42}) @@ -2739,7 +2746,7 @@ def test_swap_dims(self): assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) pd.testing.assert_index_equal( - actual.indexes["y"].array, expected.indexes["y"].array + actual.xindexes["y"].array, expected.xindexes["y"].array ) roundtripped = actual.swap_dims({"y": "x"}) @@ -2772,7 +2779,7 @@ def test_swap_dims(self): assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) pd.testing.assert_index_equal( - actual.indexes["y"].array, expected.indexes["y"].array + actual.xindexes["y"].array, expected.xindexes["y"].array ) def test_expand_dims_error(self): @@ -3152,7 +3159,7 @@ def test_to_stacked_array_dtype_dims(self): y = D.to_stacked_array("features", sample_dims) # TODO: benbovy - flexible indexes: update when MultiIndex has its own class # inherited from xarray.Index - assert y.indexes["features"].array.levels[1].dtype == D.y.dtype + assert y.xindexes["features"].array.levels[1].dtype == D.y.dtype assert y.dims == ("x", "features") def test_to_stacked_array_to_unstacked_dataset(self): @@ -5513,8 +5520,8 @@ def test_binary_op_propagate_indexes(self): ds = Dataset( {"d1": DataArray([1, 2, 3], dims=["x"], coords={"x": [10, 20, 30]})} ) - expected = ds.indexes["x"] - actual = (ds * 2).indexes["x"] + expected = ds.xindexes["x"] + actual = (ds * 2).xindexes["x"] assert expected is actual def test_binary_op_join_setting(self): From 89e018a9ec9e16caa8e7ded8b1537ac6953d8f36 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 30 Apr 2021 13:52:49 +0200 Subject: [PATCH 07/20] rename IndexAdapter -> Index --- xarray/core/alignment.py | 8 ++++---- xarray/core/combine.py | 2 +- xarray/core/common.py | 2 +- xarray/core/coordinates.py | 6 +++--- xarray/core/dataarray.py | 4 ++-- xarray/core/dataset.py | 16 ++++++++-------- xarray/core/indexes.py | 14 +++++++------- xarray/core/merge.py | 20 ++++++++++---------- xarray/core/variable.py | 2 +- xarray/testing.py | 4 ++-- xarray/tests/test_cftimeindex_resample.py | 4 ++-- xarray/tests/test_dataarray.py | 4 ++-- xarray/tests/test_dataset.py | 4 ++-- 13 files changed, 45 insertions(+), 45 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 6f185cf78f8..f47bbfaab57 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -18,7 +18,7 @@ import pandas as pd from . import dtypes -from .indexes import IndexAdapter, PandasIndexAdapter +from .indexes import Index, PandasIndexAdapter from .indexing import get_indexer_nd from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str, safe_cast_to_index from .variable import IndexVariable, Variable @@ -303,7 +303,7 @@ def align( for dim, matching_indexes in all_indexes.items(): if dim in indexes: # TODO: benbovy - flexible indexes. maybe move this logic in util func - if isinstance(indexes[dim], IndexAdapter): + if isinstance(indexes[dim], Index): index = indexes[dim] else: index = PandasIndexAdapter(safe_cast_to_index(indexes[dim])) @@ -492,14 +492,14 @@ def reindex_like_indexers( def reindex_variables( variables: Mapping[Any, Variable], sizes: Mapping[Any, int], - indexes: Mapping[Any, IndexAdapter], + indexes: Mapping[Any, Index], indexers: Mapping, method: Optional[str] = None, tolerance: Any = None, copy: bool = True, fill_value: Optional[Any] = dtypes.NA, sparse: bool = False, -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, IndexAdapter]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: """Conform a dictionary of aligned variables onto a new set of variables, filling in missing values with NaN. diff --git a/xarray/core/combine.py b/xarray/core/combine.py index cecf87f97cd..cbdb5154722 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -77,7 +77,7 @@ def _infer_concat_order_from_coords(datasets): ) # TODO (benbovy, flexible indexes): all indexes should be Pandas.Index - # get pd.Index objects from IndexAdapter objects + # get pd.Index objects from Index objects indexes = [index.array for index in indexes] # If dimension coordinate values are same on every dataset then diff --git a/xarray/core/common.py b/xarray/core/common.py index ea67635aea1..6542a35cefb 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1160,7 +1160,7 @@ def resample( category=FutureWarning, ) - # TODO (benbovy - flexible indexes): update when CFTimeIndex is an IndexAdpater subclass + # TODO (benbovy - flexible indexes): update when CFTimeIndex is an xarray Index subclass if isinstance(self.xindexes[dim_name].array, CFTimeIndex): from .resample_cftime import CFTimeGrouper diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index ba7c0c1f4e7..50be8a7f677 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -17,7 +17,7 @@ import pandas as pd from . import formatting, indexing -from .indexes import IndexAdapter, Indexes +from .indexes import Index, Indexes from .merge import merge_coordinates_without_align, merge_coords from .utils import Frozen, ReprObject, either_dict_or_kwargs from .variable import Variable @@ -271,7 +271,7 @@ def to_dataset(self) -> "Dataset": return self._data._copy_listed(names) def _update_coords( - self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, IndexAdapter] + self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, Index] ) -> None: from .dataset import calculate_dimensions @@ -334,7 +334,7 @@ def __getitem__(self, key: Hashable) -> "DataArray": return self._data._getitem_coord(key) def _update_coords( - self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, IndexAdapter] + self, coords: Dict[Hashable, Variable], indexes: Mapping[Hashable, Index] ) -> None: from .dataset import calculate_dimensions diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 9437537fbb3..a4e1ef13404 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -52,7 +52,7 @@ from .dataset import Dataset, split_indexes from .formatting import format_item from .indexes import ( - IndexAdapter, + Index, Indexes, PandasIndexAdapter, default_indexes, @@ -351,7 +351,7 @@ class DataArray(AbstractArray, DataWithCoords, DataArrayArithmetic): _cache: Dict[str, Any] _coords: Dict[Any, Variable] _close: Optional[Callable[[], None]] - _indexes: Optional[Dict[Hashable, IndexAdapter]] + _indexes: Optional[Dict[Hashable, Index]] _name: Optional[Hashable] _variable: Variable diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2af682cb8f2..803cd877301 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -61,7 +61,7 @@ ) from .duck_array_ops import datetime_to_numeric from .indexes import ( - IndexAdapter, + Index, Indexes, PandasIndexAdapter, default_indexes, @@ -694,7 +694,7 @@ class Dataset(DataWithCoords, DatasetArithmetic, Mapping): _dims: Dict[Hashable, int] _encoding: Optional[Dict[Hashable, Any]] _close: Optional[Callable[[], None]] - _indexes: Optional[Dict[Hashable, IndexAdapter]] + _indexes: Optional[Dict[Hashable, Index]] _variables: Dict[Hashable, Variable] __slots__ = ( @@ -1089,7 +1089,7 @@ def _replace( coord_names: Set[Hashable] = None, dims: Dict[Any, int] = None, attrs: Union[Dict[Hashable, Any], None, Default] = _default, - indexes: Union[Dict[Any, IndexAdapter], None, Default] = _default, + indexes: Union[Dict[Any, Index], None, Default] = _default, encoding: Union[dict, None, Default] = _default, inplace: bool = False, ) -> "Dataset": @@ -1138,7 +1138,7 @@ def _replace_with_new_dims( variables: Dict[Hashable, Variable], coord_names: set = None, attrs: Union[Dict[Hashable, Any], None, Default] = _default, - indexes: Union[Dict[Hashable, IndexAdapter], None, Default] = _default, + indexes: Union[Dict[Hashable, Index], None, Default] = _default, inplace: bool = False, ) -> "Dataset": """Replace variables with recalculated dimensions.""" @@ -1166,7 +1166,7 @@ def _replace_vars_and_dims( variables, coord_names, dims, attrs, indexes=None, inplace=inplace ) - def _overwrite_indexes(self, indexes: Mapping[Any, IndexAdapter]) -> "Dataset": + def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset": if not indexes: return self @@ -1333,7 +1333,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": """ variables: Dict[Hashable, Variable] = {} coord_names = set() - indexes: Dict[Hashable, IndexAdapter] = {} + indexes: Dict[Hashable, Index] = {} for name in names: try: @@ -2230,7 +2230,7 @@ def _isel_fancy( indexers_list = list(self._validate_indexers(indexers, missing_dims)) variables: Dict[Hashable, Variable] = {} - indexes: Dict[Hashable, IndexAdapter] = {} + indexes: Dict[Hashable, Index] = {} for name, var in self.variables.items(): var_indexers = {k: v for k, v in indexers_list if k in var.dims} @@ -3326,7 +3326,7 @@ def swap_dims( coord_names.update({dim for dim in dims_dict.values() if dim in self.variables}) variables: Dict[Hashable, Variable] = {} - indexes: Dict[Hashable, IndexAdapter] = {} + indexes: Dict[Hashable, Index] = {} for k, v in self.variables.items(): dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) if k in result_dims: diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 09776748f07..fbd7246591d 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -25,7 +25,7 @@ from .variable import Variable -class IndexAdapter: +class Index: """Base class inherited by all xarray-compatible indexes.""" __slots__ = "coord_names" @@ -57,7 +57,7 @@ def intersection(self, other): # pragma: no cover raise NotImplementedError() -class PandasIndexAdapter(IndexAdapter, ExplicitlyIndexedNDArrayMixin): +class PandasIndexAdapter(Index, ExplicitlyIndexedNDArrayMixin): """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.""" __slots__ = ("array", "_dtype") @@ -256,7 +256,7 @@ def __repr__(self): def default_indexes( coords: Mapping[Any, "Variable"], dims: Iterable -) -> Dict[Hashable, IndexAdapter]: +) -> Dict[Hashable, Index]: """Default indexes for a Dataset/DataArray. Parameters @@ -277,9 +277,9 @@ def default_indexes( def isel_variable_and_index( name: Hashable, variable: "Variable", - index: IndexAdapter, + index: Index, indexers: Mapping[Hashable, Union[int, slice, np.ndarray, "Variable"]], -) -> Tuple["Variable", Optional[IndexAdapter]]: +) -> Tuple["Variable", Optional[Index]]: """Index a Variable and pandas.Index together.""" from .variable import Variable @@ -321,8 +321,8 @@ def roll_index( def propagate_indexes( - indexes: Optional[Dict[Hashable, IndexAdapter]], exclude: Optional[Any] = None -) -> Optional[Dict[Hashable, IndexAdapter]]: + indexes: Optional[Dict[Hashable, Index]], exclude: Optional[Any] = None +) -> Optional[Dict[Hashable, Index]]: """Creates new indexes dict from existing dict optionally excluding some dimensions.""" if exclude is None: exclude = () diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 52acc96bdb6..6dbdec24674 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .indexes import IndexAdapter, PandasIndexAdapter +from .indexes import Index, PandasIndexAdapter from .utils import Frozen, compat_dict_union, dict_equiv, equivalent from .variable import Variable, as_variable, assert_unique_multiindex_level_names @@ -158,14 +158,14 @@ def _assert_compat_valid(compat): ) -MergeElement = Tuple[Variable, Optional[IndexAdapter]] +MergeElement = Tuple[Variable, Optional[Index]] def merge_collected( grouped: Dict[Hashable, List[MergeElement]], prioritized: Mapping[Hashable, MergeElement] = None, compat: str = "minimal", -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, IndexAdapter]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: """Merge dicts of variables, while resolving conflicts appropriately. Parameters @@ -187,7 +187,7 @@ def merge_collected( _assert_compat_valid(compat) merged_vars: Dict[Hashable, Variable] = {} - merged_indexes: Dict[Hashable, IndexAdapter] = {} + merged_indexes: Dict[Hashable, Index] = {} for name, elements_list in grouped.items(): if name in prioritized: @@ -252,7 +252,7 @@ def collect_variables_and_indexes( from .dataarray import DataArray from .dataset import Dataset - grouped: Dict[Hashable, List[Tuple[Variable, Optional[IndexAdapter]]]] = {} + grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {} def append(name, variable, index): values = grouped.setdefault(name, []) @@ -291,7 +291,7 @@ def collect_from_coordinates( list_of_coords: "List[Coordinates]", ) -> Dict[Hashable, List[MergeElement]]: """Collect variables and indexes to be merged from Coordinate objects.""" - grouped: Dict[Hashable, List[Tuple[Variable, Optional[IndexAdapter]]]] = {} + grouped: Dict[Hashable, List[Tuple[Variable, Optional[Index]]]] = {} for coords in list_of_coords: variables = coords.variables @@ -306,7 +306,7 @@ def merge_coordinates_without_align( objects: "List[Coordinates]", prioritized: Mapping[Hashable, MergeElement] = None, exclude_dims: AbstractSet = frozenset(), -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, IndexAdapter]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: """Merge variables/indexes from coordinates without automatic alignments. This function is used for merging coordinate from pre-existing xarray @@ -439,9 +439,9 @@ def merge_coords( compat: str = "minimal", join: str = "outer", priority_arg: Optional[int] = None, - indexes: Optional[Mapping[Hashable, IndexAdapter]] = None, + indexes: Optional[Mapping[Hashable, Index]] = None, fill_value: object = dtypes.NA, -) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, IndexAdapter]]: +) -> Tuple[Dict[Hashable, Variable], Dict[Hashable, Index]]: """Merge coordinate variables. See merge_core below for argument descriptions. This works similarly to @@ -560,7 +560,7 @@ def merge_core( combine_attrs: Optional[str] = "override", priority_arg: Optional[int] = None, explicit_coords: Optional[Sequence] = None, - indexes: Optional[Mapping[Hashable, IndexAdapter]] = None, + indexes: Optional[Mapping[Hashable, Index]] = None, fill_value: object = dtypes.NA, ) -> _MergeResult: """Core logic for merging labeled objects. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 06bca4e999f..e2892bea150 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -553,7 +553,7 @@ def to_index_variable(self): def _to_index_adpater(self): # temporary function used internally as a replacement of to_index() - # returns an IndexAdpater instance instead of a pd.Index instance + # returns an xarray Index instance instead of a pd.Index instance return PandasIndexAdapter(self.to_index()) def to_index(self): diff --git a/xarray/testing.py b/xarray/testing.py index 6095d9ece46..40ca12852b9 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -8,7 +8,7 @@ from xarray.core import duck_array_ops, formatting, utils from xarray.core.dataarray import DataArray from xarray.core.dataset import Dataset -from xarray.core.indexes import IndexAdapter, default_indexes +from xarray.core.indexes import Index, default_indexes from xarray.core.variable import IndexVariable, Variable __all__ = ( @@ -253,7 +253,7 @@ def assert_chunks_equal(a, b): def _assert_indexes_invariants_checks(indexes, possible_coord_variables, dims): assert isinstance(indexes, dict), indexes - assert all(isinstance(v, IndexAdapter) for v in indexes.values()), { + assert all(isinstance(v, Index) for v in indexes.values()), { k: type(v) for k, v in indexes.items() } diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index b771b8c5197..04a3e8d5144 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -99,7 +99,7 @@ def test_resample(freqs, closed, label, base): ) .mean() ) - # TODO (benbovy - flexible indexes): update when CFTimeIndex is a IndexAdpater subclass + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass da_cftime["time"] = da_cftime.xindexes["time"].array.to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) @@ -146,6 +146,6 @@ def test_calendars(calendar): .resample(time=freq, closed=closed, label=label, base=base, loffset=loffset) .mean() ) - # TODO (benbovy - flexible indexes): update when CFTimeIndex is a IndexAdpater subclass + # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass da_cftime["time"] = da_cftime.xindexes["time"].array.to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index e776e686a7b..66e9d93aba2 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -24,7 +24,7 @@ from xarray.convert import from_cdms2 from xarray.core import dtypes from xarray.core.common import full_like -from xarray.core.indexes import IndexAdapter, PandasIndexAdapter, propagate_indexes +from xarray.core.indexes import Index, PandasIndexAdapter, propagate_indexes from xarray.core.utils import is_scalar from xarray.tests import ( LooseVersion, @@ -154,7 +154,7 @@ def test_indexes(self): assert array.xindexes.keys() == expected_xindexes.keys() assert array.indexes.keys() == expected_indexes.keys() assert all([isinstance(idx, pd.Index) for idx in array.indexes.values()]) - assert all([isinstance(idx, IndexAdapter) for idx in array.xindexes.values()]) + assert all([isinstance(idx, Index) for idx in array.xindexes.values()]) for k in expected_indexes: assert array.xindexes[k].equals(expected_xindexes[k]) assert array.indexes[k].equals(expected_indexes[k]) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index dfb2c95fd83..d54e28493ad 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -28,7 +28,7 @@ from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import dtypes, indexing, utils from xarray.core.common import duck_array_ops, full_like -from xarray.core.indexes import IndexAdapter +from xarray.core.indexes import Index from xarray.core.pycompat import integer_types from xarray.core.utils import is_scalar @@ -584,7 +584,7 @@ def test_properties(self): assert set(ds.xindexes) == {"dim2", "dim3", "time"} assert len(ds.xindexes) == 3 assert "dim2" in repr(ds.xindexes) - assert all([isinstance(idx, IndexAdapter) for idx in ds.xindexes.values()]) + assert all([isinstance(idx, Index) for idx in ds.xindexes.values()]) assert set(ds.indexes) == {"dim2", "dim3", "time"} assert len(ds.indexes) == 3 From c8a5dd8eccd9edff3849683f06b719a7caf0a33e Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 30 Apr 2021 14:03:06 +0200 Subject: [PATCH 08/20] rename _to_index_adpater (typo) -> _to_xindex --- xarray/core/dataset.py | 6 +++--- xarray/core/indexes.py | 2 +- xarray/core/merge.py | 4 ++-- xarray/core/variable.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 803cd877301..cc9ba92061a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1346,7 +1346,7 @@ def _copy_listed(self, names: Iterable[Hashable]) -> "Dataset": if ref_name in self._coord_names or ref_name in self.dims: coord_names.add(var_name) if (var_name,) == var.dims: - indexes[var_name] = var._to_index_adpater() + indexes[var_name] = var._to_xindex() needed_dims: Set[Hashable] = set() for v in variables.values(): @@ -2202,7 +2202,7 @@ def isel( continue if indexes and var_name in indexes: if var_value.ndim == 1: - indexes[var_name] = var_value._to_index_adpater() + indexes[var_name] = var_value._to_xindex() else: del indexes[var_name] variables[var_name] = var_value @@ -3003,7 +3003,7 @@ def _validate_interp_indexer(x, new_x): for k, v in indexers.items(): assert isinstance(v, Variable) if v.dims == (k,): - indexes[k] = v._to_index_adpater() + indexes[k] = v._to_xindex() # Extract coordinates from indexers coord_vars, new_indexes = selected._get_indexers_coords_and_indexes(coords) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index fbd7246591d..6e4753b3fe7 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -271,7 +271,7 @@ def default_indexes( Mapping from indexing keys (levels/dimension names) to indexes used for indexing along that dimension. """ - return {key: coords[key]._to_index_adpater() for key in dims if key in coords} + return {key: coords[key]._to_xindex() for key in dims if key in coords} def isel_variable_and_index( diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 6dbdec24674..f8bcc2baf53 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -279,7 +279,7 @@ def append_all(variables, indexes): variable = as_variable(variable, name=name) if variable.dims == (name,): variable = variable.to_index_variable() - index = variable._to_index_adpater() + index = variable._to_xindex() else: index = None append(name, variable, index) @@ -475,7 +475,7 @@ def _extract_indexes_from_coords(coords): for name, variable in coords.items(): variable = as_variable(variable, name=name) if variable.dims == (name,): - yield name, variable._to_index_adpater() + yield name, variable._to_xindex() def assert_valid_explicit_coords(variables, dims, explicit_coords): diff --git a/xarray/core/variable.py b/xarray/core/variable.py index e2892bea150..f972b99f95b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -551,7 +551,7 @@ def to_index_variable(self): to_coord = utils.alias(to_index_variable, "to_coord") - def _to_index_adpater(self): + def _to_xindex(self): # temporary function used internally as a replacement of to_index() # returns an xarray Index instance instead of a pd.Index instance return PandasIndexAdapter(self.to_index()) From c492e3eb5c4ff904de069e569f9c696f447b135c Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 30 Apr 2021 16:23:06 +0200 Subject: [PATCH 09/20] add Index.to_pandas_index() method Also improve xarray_obj.indexes property implementation --- xarray/core/alignment.py | 2 +- xarray/core/common.py | 4 +-- xarray/core/dataarray.py | 20 ++++++++----- xarray/core/dataset.py | 35 +++++++++++++---------- xarray/core/indexes.py | 23 ++++++++++----- xarray/core/indexing.py | 2 +- xarray/core/missing.py | 4 ++- xarray/core/parallel.py | 2 +- xarray/tests/test_cftimeindex.py | 2 +- xarray/tests/test_cftimeindex_resample.py | 6 ++-- xarray/tests/test_dataset.py | 32 +++++++++++---------- 11 files changed, 79 insertions(+), 53 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index f47bbfaab57..2e3de35fe93 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -570,7 +570,7 @@ def reindex_variables( if dim in indexes: # TODO (benbovy - flexible indexes): support other indexes than pd.Index? - index = indexes[dim].array + index = indexes[dim].to_pandas_index() if not index.is_unique: raise ValueError( diff --git a/xarray/core/common.py b/xarray/core/common.py index 6542a35cefb..819a72229a2 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -406,7 +406,7 @@ def get_index(self, key: Hashable) -> pd.Index: raise KeyError(key) try: - return self.xindexes[key].array + return self.xindexes[key].to_pandas_index() except KeyError: return pd.Index(range(self.sizes[key]), name=key) @@ -1161,7 +1161,7 @@ def resample( ) # TODO (benbovy - flexible indexes): update when CFTimeIndex is an xarray Index subclass - if isinstance(self.xindexes[dim_name].array, CFTimeIndex): + if isinstance(self.xindexes[dim_name].to_pandas_index(), CFTimeIndex): from .resample_cftime import CFTimeGrouper grouper = CFTimeGrouper(freq, closed, label, base, loffset) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index a4e1ef13404..1a3881fb586 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -780,14 +780,18 @@ def encoding(self, value: Mapping[Hashable, Any]) -> None: @property def indexes(self) -> Indexes: - """Mapping of pandas.Index objects used for label based indexing""" - if self._indexes is None: - self._indexes = default_indexes(self._coords, self.dims) - return Indexes({k: idx.array for k, idx in self._indexes.items()}) + """Mapping of xarray Index or pandas.Index objects used for label based indexing.""" + xr_or_pd_indexes = {} + for k, idx in self.xindexes.items(): + try: + xr_or_pd_indexes[k] = idx.to_pandas_index() + except TypeError: + xr_or_pd_indexes[k] = idx + return Indexes(xr_or_pd_indexes) @property def xindexes(self) -> Indexes: - """Mapping of xarray.Index objects used for label based indexing""" + """Mapping of xarray Index objects used for label based indexing.""" if self._indexes is None: self._indexes = default_indexes(self._coords, self.dims) return Indexes(self._indexes) @@ -1005,8 +1009,10 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": if self._indexes is None: indexes = self._indexes else: + # TODO: benbovy: flexible indexes: support all xarray indexes (not just pandas.Index) + # xarray Index needs a copy method. indexes = { - k: PandasIndexAdapter(v.array.copy(deep=deep)) + k: PandasIndexAdapter(v.to_pandas_index().copy(deep=deep)) for k, v in self._indexes.items() } return self._replace(variable, coords, indexes=indexes) @@ -2189,7 +2195,7 @@ def to_unstacked_dataset(self, dim, level=0): # TODO: benbovy - flexible indexes: update when MultIndex has its own # class inheriting from xarray.Index - idx = self.xindexes[dim].array + idx = self.xindexes[dim].to_pandas_index() if not isinstance(idx, pd.MultiIndex): raise ValueError(f"'{dim}' is not a stacked coordinate") diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index cc9ba92061a..fc5393f64f8 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1180,7 +1180,7 @@ def _overwrite_indexes(self, indexes: Mapping[Any, Index]) -> "Dataset": # switch from dimension to level names, if necessary dim_names: Dict[Hashable, str] = {} for dim, idx in indexes.items(): - pd_idx = idx.array + pd_idx = idx.to_pandas_index() if not isinstance(pd_idx, pd.MultiIndex) and pd_idx.name != dim: dim_names[dim] = pd_idx.name if dim_names: @@ -1318,11 +1318,11 @@ def _level_coords(self) -> Dict[str, Hashable]: coordinate name. """ level_coords: Dict[str, Hashable] = {} - for name, index_adapter in self.xindexes.items(): - index = index_adapter + for name, index in self.xindexes.items(): # TODO: benbovy - flexible indexes: update when MultIndex has its own xarray class. - if isinstance(index.array, pd.MultiIndex): - level_names = index.array.names + pd_index = index.to_pandas_index() + if isinstance(pd_index, pd.MultiIndex): + level_names = pd_index.names (dim,) = self.variables[name].dims level_coords.update({lname: dim for lname in level_names}) return level_coords @@ -1579,14 +1579,18 @@ def identical(self, other: "Dataset") -> bool: @property def indexes(self) -> Indexes: - """Mapping of pandas.Index objects used for label based indexing""" - if self._indexes is None: - self._indexes = default_indexes(self._variables, self._dims) - return Indexes({k: idx.array for k, idx in self._indexes.items()}) + """Mapping of xarray Index or pandas.Index objects used for label based indexing.""" + xr_or_pd_indexes = {} + for k, idx in self.xindexes.items(): + try: + xr_or_pd_indexes[k] = idx.to_pandas_index() + except TypeError: + xr_or_pd_indexes[k] = idx + return Indexes(xr_or_pd_indexes) @property def xindexes(self) -> Indexes: - """Mapping of xarray.Index objects used for label based indexing""" + """Mapping of xarray Index objects used for label based indexing.""" if self._indexes is None: self._indexes = default_indexes(self._variables, self._dims) return Indexes(self._indexes) @@ -2053,7 +2057,7 @@ def _validate_indexers( if v.dtype.kind in "US": # TODO: benbovy - flexible indexes # update when CFTimeIndex has its own xarray index class - index = self.xindexes[k].array + index = self.xindexes[k].to_pandas_index() if isinstance(index, pd.DatetimeIndex): v = v.astype("datetime64[ns]") elif isinstance(index, xr.CFTimeIndex): @@ -3100,7 +3104,8 @@ def _rename_indexes(self, name_dict, dims_set): return None indexes = {} for k, v in self.xindexes.items(): - index = v.array + # TODO: benbovy - flexible indexes: make it compatible with any xarray Index + index = v.to_pandas_index() new_name = name_dict.get(k, k) if new_name not in dims_set: continue @@ -3606,7 +3611,7 @@ def reorder_levels( coord = self._variables[dim] # TODO: benbovy - flexible indexes: update when MultiIndex # has its own class inherited from xarray.Index - index = self.xindexes[dim].array + index = self.xindexes[dim].to_pandas_index() if not isinstance(index, pd.MultiIndex): raise ValueError(f"coordinate {dim} has no MultiIndex") new_index = index.reorder_levels(order) @@ -3793,7 +3798,7 @@ def ensure_stackable(val): # input a dummy value for the singleton dimension. # TODO: benbovy - flexible indexes: update when MultIndex has its own # class inheriting from xarray.Index - idx = data_array.xindexes[new_dim].array + idx = data_array.xindexes[new_dim].to_pandas_index() levels = [idx.levels[0]] + [ level.astype(self[level.name].dtype) for level in idx.levels[1:] ] @@ -5611,7 +5616,7 @@ def diff(self, dim, n=1, label="upper"): if dim in indexes: # TODO: benbovy - flexible indexes: check slicing of xarray indexes? # or only allow this for pandas indexes? - index = indexes[dim].array + index = indexes[dim].to_pandas_index() indexes[dim] = PandasIndexAdapter(index[kwargs_new[dim]]) difference = self._replace_with_new_dims(variables, indexes=indexes) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 6e4753b3fe7..fa2e51d004d 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -36,17 +36,22 @@ def __init__(self, coord_names: Union[Hashable, Iterable[Hashable]]): else: self.coord_names = tuple([coord_names]) - # TODO (benbovy - flexible indexes): remove - # temporarly avoid mypy errors: the `array` attribute is used in many places - # to access the underlying pandas.Index objects from xarray_obj.indexes - self.array = pd.Index([]) - @classmethod def from_variables( cls, variables: Dict[Hashable, "Variable"], **kwargs ): # pragma: no cover raise NotImplementedError() + def to_pandas_index(self) -> pd.Index: + """Cast this xarray index to a pandas.Index object or raise a TypeError + if this is not supported. + + This method is used by all xarray operations that expect/require a + pandas.Index object. + + """ + raise TypeError(f"{type(self)} cannot be cast to a pandas.Index object.") + def equals(self, other): # pragma: no cover raise NotImplementedError() @@ -93,6 +98,9 @@ def from_variables(cls, variables: Dict[Hashable, "Variable"], **kwargs): varname, var = list(variables.items())[0] return cls(var.data, dtype=var.dtype, coord_name=varname) + def to_pandas_index(self) -> pd.Index: + return self.array + @property def dtype(self) -> np.dtype: return self._dtype @@ -303,7 +311,8 @@ def isel_variable_and_index( indexer = indexers[dim] if isinstance(indexer, Variable): indexer = indexer.data - new_index = PandasIndexAdapter(index.array[indexer]) + pd_index = index.to_pandas_index() + new_index = PandasIndexAdapter(pd_index[indexer]) return new_variable, new_index @@ -311,7 +320,7 @@ def roll_index( index: PandasIndexAdapter, count: int, axis: int = 0 ) -> PandasIndexAdapter: """Roll an pandas.Index.""" - pd_index = index.array + pd_index = index.to_pandas_index() count %= pd_index.shape[0] if count != 0: new_idx = pd_index[-count:].append(pd_index[:-count]) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 1228cd4d3cb..b463d9ed583 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -257,7 +257,7 @@ def remap_label_indexers(data_obj, indexers, method=None, tolerance=None): dim_indexers = get_dim_indexers(data_obj, indexers) for dim, label in dim_indexers.items(): try: - index = data_obj.xindexes[dim].array + index = data_obj.xindexes[dim].to_pandas_index() except KeyError: # no index for this dimension: reuse the provided labels if method is not None or tolerance is not None: diff --git a/xarray/core/missing.py b/xarray/core/missing.py index 6068fb47d22..9dcd1ce5b6c 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -321,7 +321,9 @@ def interp_na( # has its own class inheriting from xarray.Index if ( dim in self.xindexes - and isinstance(self.xindexes[dim].array, (pd.DatetimeIndex, CFTimeIndex)) + and isinstance( + self.xindexes[dim].to_pandas_index(), (pd.DatetimeIndex, CFTimeIndex) + ) and use_coordinate ): # Convert to float diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 9b35b0223bd..d1872ea0ff5 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -508,7 +508,7 @@ def subset_dataset_to_block( # all indexes expected_indexes = {} for dim in indexes: - idx = indexes[dim].array[ + idx = indexes[dim].to_pandas_index()[ _get_chunk_slicer(dim, chunk_index, output_chunk_bounds) ] expected_indexes[dim] = PandasIndexAdapter(idx) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index 476117c6e10..2cd8c003f52 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -696,7 +696,7 @@ def test_concat_cftimeindex(date_type): ) da = xr.concat([da1, da2], dim="time") - assert isinstance(da.xindexes["time"].array, CFTimeIndex) + assert isinstance(da.xindexes["time"].to_pandas_index(), CFTimeIndex) @requires_cftime diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index 04a3e8d5144..526f3fc30c1 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -100,7 +100,9 @@ def test_resample(freqs, closed, label, base): .mean() ) # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass - da_cftime["time"] = da_cftime.xindexes["time"].array.to_datetimeindex() + da_cftime["time"] = ( + da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() + ) xr.testing.assert_identical(da_cftime, da_datetime) @@ -147,5 +149,5 @@ def test_calendars(calendar): .mean() ) # TODO (benbovy - flexible indexes): update when CFTimeIndex is a xarray Index subclass - da_cftime["time"] = da_cftime.xindexes["time"].array.to_datetimeindex() + da_cftime["time"] = da_cftime.xindexes["time"].to_pandas_index().to_datetimeindex() xr.testing.assert_identical(da_cftime, da_datetime) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index d54e28493ad..fd392818420 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2698,20 +2698,20 @@ def test_rename_does_not_change_CFTimeIndex_type(self): assert "time_new" in renamed.xindexes # TODO: benbovy - flexible indexes: update when CFTimeIndex # inherits from xarray.Index - assert isinstance(renamed.xindexes["time_new"].array, CFTimeIndex) - assert renamed.xindexes["time_new"].array.name == "time_new" + assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), CFTimeIndex) + assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new" # check original has not changed assert "time" in orig.xindexes - assert isinstance(orig.xindexes["time"].array, CFTimeIndex) - assert orig.xindexes["time"].array.name == "time" + assert isinstance(orig.xindexes["time"].to_pandas_index(), CFTimeIndex) + assert orig.xindexes["time"].to_pandas_index().name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() - assert isinstance(renamed.xindexes["time"].array, CFTimeIndex) + assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex) renamed = orig.rename_vars() - assert isinstance(renamed.xindexes["time"].array, CFTimeIndex) + assert isinstance(renamed.xindexes["time"].to_pandas_index(), CFTimeIndex) def test_rename_does_not_change_DatetimeIndex_type(self): # make sure DatetimeIndex is conderved on rename @@ -2723,20 +2723,20 @@ def test_rename_does_not_change_DatetimeIndex_type(self): assert "time_new" in renamed.xindexes # TODO: benbovy - flexible indexes: update when DatetimeIndex # inherits from xarray.Index? - assert isinstance(renamed.xindexes["time_new"].array, DatetimeIndex) - assert renamed.xindexes["time_new"].array.name == "time_new" + assert isinstance(renamed.xindexes["time_new"].to_pandas_index(), DatetimeIndex) + assert renamed.xindexes["time_new"].to_pandas_index().name == "time_new" # check original has not changed assert "time" in orig.xindexes - assert isinstance(orig.xindexes["time"].array, DatetimeIndex) - assert orig.xindexes["time"].array.name == "time" + assert isinstance(orig.xindexes["time"].to_pandas_index(), DatetimeIndex) + assert orig.xindexes["time"].to_pandas_index().name == "time" # note: rename_dims(time="time_new") drops "ds.indexes" renamed = orig.rename_dims() - assert isinstance(renamed.xindexes["time"].array, DatetimeIndex) + assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex) renamed = orig.rename_vars() - assert isinstance(renamed.xindexes["time"].array, DatetimeIndex) + assert isinstance(renamed.xindexes["time"].to_pandas_index(), DatetimeIndex) def test_swap_dims(self): original = Dataset({"x": [1, 2, 3], "y": ("x", list("abc")), "z": 42}) @@ -2746,7 +2746,8 @@ def test_swap_dims(self): assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) pd.testing.assert_index_equal( - actual.xindexes["y"].array, expected.xindexes["y"].array + actual.xindexes["y"].to_pandas_index(), + expected.xindexes["y"].to_pandas_index(), ) roundtripped = actual.swap_dims({"y": "x"}) @@ -2779,7 +2780,8 @@ def test_swap_dims(self): assert isinstance(actual.variables["y"], IndexVariable) assert isinstance(actual.variables["x"], Variable) pd.testing.assert_index_equal( - actual.xindexes["y"].array, expected.xindexes["y"].array + actual.xindexes["y"].to_pandas_index(), + expected.xindexes["y"].to_pandas_index(), ) def test_expand_dims_error(self): @@ -3159,7 +3161,7 @@ def test_to_stacked_array_dtype_dims(self): y = D.to_stacked_array("features", sample_dims) # TODO: benbovy - flexible indexes: update when MultiIndex has its own class # inherited from xarray.Index - assert y.xindexes["features"].array.levels[1].dtype == D.y.dtype + assert y.xindexes["features"].to_pandas_index().levels[1].dtype == D.y.dtype assert y.dims == ("x", "features") def test_to_stacked_array_to_unstacked_dataset(self): From 39e78420821a681914578f654956bd61b2b338ca Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 May 2021 17:33:07 +0200 Subject: [PATCH 10/20] rename PandasIndexAdpater -> PandasIndex --- xarray/core/alignment.py | 8 +++----- xarray/core/dataarray.py | 10 ++-------- xarray/core/dataset.py | 16 ++++++++-------- xarray/core/indexes.py | 18 ++++++++---------- xarray/core/indexing.py | 8 ++++---- xarray/core/merge.py | 6 ++---- xarray/core/parallel.py | 4 ++-- xarray/core/variable.py | 16 ++++++++-------- xarray/tests/test_backends.py | 2 +- xarray/tests/test_dataarray.py | 6 ++---- xarray/tests/test_variable.py | 8 ++++---- 11 files changed, 44 insertions(+), 58 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 2e3de35fe93..a373fdf3080 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -18,7 +18,7 @@ import pandas as pd from . import dtypes -from .indexes import Index, PandasIndexAdapter +from .indexes import Index, PandasIndex from .indexing import get_indexer_nd from .utils import is_dict_like, is_full_slice, maybe_coerce_to_str, safe_cast_to_index from .variable import IndexVariable, Variable @@ -306,7 +306,7 @@ def align( if isinstance(indexes[dim], Index): index = indexes[dim] else: - index = PandasIndexAdapter(safe_cast_to_index(indexes[dim])) + index = PandasIndex(safe_cast_to_index(indexes[dim])) if ( any(not index.equals(other) for other in matching_indexes) or dim in unlabeled_dim_sizes @@ -564,9 +564,7 @@ def reindex_variables( "from that to be indexed along {:s}".format(str(indexer.dims), dim) ) - target = new_indexes[dim] = PandasIndexAdapter( - safe_cast_to_index(indexers[dim]) - ) + target = new_indexes[dim] = PandasIndex(safe_cast_to_index(indexers[dim])) if dim in indexes: # TODO (benbovy - flexible indexes): support other indexes than pd.Index? diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1a3881fb586..1cf38960700 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -51,13 +51,7 @@ ) from .dataset import Dataset, split_indexes from .formatting import format_item -from .indexes import ( - Index, - Indexes, - PandasIndexAdapter, - default_indexes, - propagate_indexes, -) +from .indexes import Index, Indexes, PandasIndex, default_indexes, propagate_indexes from .indexing import is_fancy_indexer from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords from .options import OPTIONS, _get_keep_attrs @@ -1012,7 +1006,7 @@ def copy(self, deep: bool = True, data: Any = None) -> "DataArray": # TODO: benbovy: flexible indexes: support all xarray indexes (not just pandas.Index) # xarray Index needs a copy method. indexes = { - k: PandasIndexAdapter(v.to_pandas_index().copy(deep=deep)) + k: PandasIndex(v.to_pandas_index().copy(deep=deep)) for k, v in self._indexes.items() } return self._replace(variable, coords, indexes=indexes) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fc5393f64f8..2da01c68e22 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -63,7 +63,7 @@ from .indexes import ( Index, Indexes, - PandasIndexAdapter, + PandasIndex, default_indexes, isel_variable_and_index, propagate_indexes, @@ -3114,7 +3114,7 @@ def _rename_indexes(self, name_dict, dims_set): new_index = index.rename(names=new_names) else: new_index = index.rename(new_name) - indexes[new_name] = PandasIndexAdapter(new_index) + indexes[new_name] = PandasIndex(new_index) return indexes def _rename_all(self, name_dict, dims_dict): @@ -3343,7 +3343,7 @@ def swap_dims( if new_index.nlevels == 1: # make sure index name matches dimension name new_index = new_index.rename(k) - indexes[k] = PandasIndexAdapter(new_index) + indexes[k] = PandasIndex(new_index) else: var = v.to_base_variable() var.dims = dims @@ -3616,7 +3616,7 @@ def reorder_levels( raise ValueError(f"coordinate {dim} has no MultiIndex") new_index = index.reorder_levels(order) variables[dim] = IndexVariable(coord.dims, new_index) - indexes[dim] = PandasIndexAdapter(new_index) + indexes[dim] = PandasIndex(new_index) return self._replace(variables, indexes=indexes) @@ -3644,7 +3644,7 @@ def _stack_once(self, dims, new_dim): coord_names = set(self._coord_names) - set(dims) | {new_dim} indexes = {k: v for k, v in self.xindexes.items() if k not in dims} - indexes[new_dim] = PandasIndexAdapter(idx) + indexes[new_dim] = PandasIndex(idx) return self._replace_with_new_dims( variables, coord_names=coord_names, indexes=indexes @@ -3833,7 +3833,7 @@ def _unstack_once(self, dim: Hashable, fill_value) -> "Dataset": for name, lev in zip(index.names, index.levels): variables[name] = IndexVariable(name, lev) - indexes[name] = PandasIndexAdapter(lev) + indexes[name] = PandasIndex(lev) coord_names = set(self._coord_names) - {dim} | set(index.names) @@ -3872,7 +3872,7 @@ def _unstack_full_reindex( for name, lev in zip(new_dim_names, index.levels): variables[name] = IndexVariable(name, lev) - indexes[name] = PandasIndexAdapter(lev) + indexes[name] = PandasIndex(lev) coord_names = set(self._coord_names) - {dim} | set(new_dim_names) @@ -5617,7 +5617,7 @@ def diff(self, dim, n=1, label="upper"): # TODO: benbovy - flexible indexes: check slicing of xarray indexes? # or only allow this for pandas indexes? index = indexes[dim].to_pandas_index() - indexes[dim] = PandasIndexAdapter(index[kwargs_new[dim]]) + indexes[dim] = PandasIndex(index[kwargs_new[dim]]) difference = self._replace_with_new_dims(variables, indexes=indexes) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index fa2e51d004d..9f15f7d7f00 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -62,7 +62,7 @@ def intersection(self, other): # pragma: no cover raise NotImplementedError() -class PandasIndexAdapter(Index, ExplicitlyIndexedNDArrayMixin): +class PandasIndex(Index, ExplicitlyIndexedNDArrayMixin): """Wrap a pandas.Index to preserve dtypes and handle explicit indexing.""" __slots__ = ("array", "_dtype") @@ -142,7 +142,7 @@ def intersection(self, other): def __getitem__( self, indexer ) -> Union[ - "PandasIndexAdapter", + "PandasIndex", NumpyIndexingAdapter, np.ndarray, np.datetime64, @@ -160,7 +160,7 @@ def __getitem__( result = self.array[key] if isinstance(result, pd.Index): - result = PandasIndexAdapter(result, dtype=self.dtype) + result = PandasIndex(result, dtype=self.dtype) else: # result is a scalar if result is pd.NaT: @@ -192,7 +192,7 @@ def __repr__(self) -> str: type(self).__name__, self.array, self.dtype ) - def copy(self, deep: bool = True) -> "PandasIndexAdapter": + def copy(self, deep: bool = True) -> "PandasIndex": # Not the same as just writing `self.array.copy(deep=deep)`, as # shallow copies of the underlying numpy.ndarrays become deep ones # upon pickling @@ -201,7 +201,7 @@ def copy(self, deep: bool = True) -> "PandasIndexAdapter": # >>> len(pickle.dumps((self.array, self.array.copy(deep=False)))) # 8000341 array = self.array.copy(deep=True) if deep else self.array - return PandasIndexAdapter(array, self._dtype) + return PandasIndex(array, self._dtype) def remove_unused_levels_categories(index: pd.Index) -> pd.Index: @@ -312,13 +312,11 @@ def isel_variable_and_index( if isinstance(indexer, Variable): indexer = indexer.data pd_index = index.to_pandas_index() - new_index = PandasIndexAdapter(pd_index[indexer]) + new_index = PandasIndex(pd_index[indexer]) return new_variable, new_index -def roll_index( - index: PandasIndexAdapter, count: int, axis: int = 0 -) -> PandasIndexAdapter: +def roll_index(index: PandasIndex, count: int, axis: int = 0) -> PandasIndex: """Roll an pandas.Index.""" pd_index = index.to_pandas_index() count %= pd_index.shape[0] @@ -326,7 +324,7 @@ def roll_index( new_idx = pd_index[-count:].append(pd_index[:-count]) else: new_idx = pd_index[:] - return PandasIndexAdapter(new_idx) + return PandasIndex(new_idx) def propagate_indexes( diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index b463d9ed583..6f53927a2cc 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -108,7 +108,7 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No dimension. If `index` is a pandas.MultiIndex and depending on `label`, return a new pandas.Index or pandas.MultiIndex (otherwise return None). """ - from .indexes import PandasIndexAdapter + from .indexes import PandasIndex new_index = None @@ -201,7 +201,7 @@ def convert_label_indexer(index, label, index_name="", method=None, tolerance=No raise KeyError(f"not all values found in index {index_name!r}") if new_index is not None: - new_index = PandasIndexAdapter(new_index) + new_index = PandasIndex(new_index) return indexer, new_index @@ -721,9 +721,9 @@ def as_indexable(array): if isinstance(array, np.ndarray): return NumpyIndexingAdapter(array) if isinstance(array, pd.Index): - from .indexes import PandasIndexAdapter + from .indexes import PandasIndex - return PandasIndexAdapter(array) + return PandasIndex(array) if isinstance(array, dask_array_type): return DaskIndexingAdapter(array) if hasattr(array, "__array_function__"): diff --git a/xarray/core/merge.py b/xarray/core/merge.py index f8bcc2baf53..ade279f2c8d 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -20,7 +20,7 @@ from . import dtypes, pdcompat from .alignment import deep_align from .duck_array_ops import lazy_array_equiv -from .indexes import Index, PandasIndexAdapter +from .indexes import Index, PandasIndex from .utils import Frozen, compat_dict_union, dict_equiv, equivalent from .variable import Variable, as_variable, assert_unique_multiindex_level_names @@ -960,9 +960,7 @@ def dataset_update_method( # use ds.coords and not ds.indexes, else str coords are cast to object # TODO: benbovy - flexible indexes: fix this (it only works with pandas indexes) - indexes = { - key: PandasIndexAdapter(dataset.coords[key]) for key in dataset.xindexes.keys() - } + indexes = {key: PandasIndex(dataset.coords[key]) for key in dataset.xindexes.keys()} return merge_core( [dataset, other], priority_arg=1, diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index d1872ea0ff5..4e3e55cd0b7 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -27,7 +27,7 @@ import numpy as np -from xarray.core.indexes import PandasIndexAdapter +from xarray.core.indexes import PandasIndex from .alignment import align from .dataarray import DataArray @@ -511,7 +511,7 @@ def subset_dataset_to_block( idx = indexes[dim].to_pandas_index()[ _get_chunk_slicer(dim, chunk_index, output_chunk_bounds) ] - expected_indexes[dim] = PandasIndexAdapter(idx) + expected_indexes[dim] = PandasIndex(idx) expected["indexes"] = expected_indexes from_wrapper = (gname,) + chunk_tuple diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f972b99f95b..5a62f1b8281 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -26,7 +26,7 @@ from . import common, dtypes, duck_array_ops, indexing, nputils, ops, utils from .arithmetic import VariableArithmetic from .common import AbstractArray -from .indexes import PandasIndexAdapter +from .indexes import PandasIndex from .indexing import BasicIndexer, OuterIndexer, VectorizedIndexer, as_indexable from .options import _get_keep_attrs from .pycompat import ( @@ -175,11 +175,11 @@ def _maybe_wrap_data(data): Put pandas.Index and numpy.ndarray arguments in adapter objects to ensure they can be indexed properly. - NumpyArrayAdapter, PandasIndexAdapter and LazilyIndexedArray should + NumpyArrayAdapter, PandasIndex and LazilyIndexedArray should all pass through unmodified. """ if isinstance(data, pd.Index): - return PandasIndexAdapter(data) + return PandasIndex(data) return data @@ -346,7 +346,7 @@ def nbytes(self): @property def _in_memory(self): - return isinstance(self._data, (np.ndarray, np.number, PandasIndexAdapter)) or ( + return isinstance(self._data, (np.ndarray, np.number, PandasIndex)) or ( isinstance(self._data, indexing.MemoryCachedArray) and isinstance(self._data.array, indexing.NumpyIndexingAdapter) ) @@ -554,7 +554,7 @@ def to_index_variable(self): def _to_xindex(self): # temporary function used internally as a replacement of to_index() # returns an xarray Index instance instead of a pd.Index instance - return PandasIndexAdapter(self.to_index()) + return PandasIndex(self.to_index()) def to_index(self): """Convert this variable to a pandas.Index""" @@ -2529,8 +2529,8 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False): raise ValueError("%s objects must be 1-dimensional" % type(self).__name__) # Unlike in Variable, always eagerly load values into memory - if not isinstance(self._data, PandasIndexAdapter): - self._data = PandasIndexAdapter(self._data) + if not isinstance(self._data, PandasIndex): + self._data = PandasIndex(self._data) def __dask_tokenize__(self): from dask.base import normalize_token @@ -2838,7 +2838,7 @@ def assert_unique_multiindex_level_names(variables): level_names = defaultdict(list) all_level_names = set() for var_name, var in variables.items(): - if isinstance(var._data, PandasIndexAdapter): + if isinstance(var._data, PandasIndex): idx_level_names = var.to_index_variable().level_names if idx_level_names is not None: for n in idx_level_names: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index aba2d9beeb5..c65ce533039 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -735,7 +735,7 @@ def find_and_validate_array(obj): elif isinstance(obj.array, dask_array_type): assert isinstance(obj, indexing.DaskIndexingAdapter) elif isinstance(obj.array, pd.Index): - assert isinstance(obj, indexes.PandasIndexAdapter) + assert isinstance(obj, indexes.PandasIndex) else: raise TypeError( "{} is wrapped by {}".format(type(obj.array), type(obj)) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 66e9d93aba2..f9d3d23f263 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -24,7 +24,7 @@ from xarray.convert import from_cdms2 from xarray.core import dtypes from xarray.core.common import full_like -from xarray.core.indexes import Index, PandasIndexAdapter, propagate_indexes +from xarray.core.indexes import Index, PandasIndex, propagate_indexes from xarray.core.utils import is_scalar from xarray.tests import ( LooseVersion, @@ -148,9 +148,7 @@ def test_data_property(self): def test_indexes(self): array = DataArray(np.zeros((2, 3)), [("x", [0, 1]), ("y", ["a", "b", "c"])]) expected_indexes = {"x": pd.Index([0, 1]), "y": pd.Index(["a", "b", "c"])} - expected_xindexes = { - k: PandasIndexAdapter(idx) for k, idx in expected_indexes.items() - } + expected_xindexes = {k: PandasIndex(idx) for k, idx in expected_indexes.items()} assert array.xindexes.keys() == expected_xindexes.keys() assert array.indexes.keys() == expected_indexes.keys() assert all([isinstance(idx, pd.Index) for idx in array.indexes.values()]) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index df324366f7b..1e0dff45dd2 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -11,7 +11,7 @@ from xarray import Coordinate, DataArray, Dataset, IndexVariable, Variable, set_options from xarray.core import dtypes, duck_array_ops, indexing from xarray.core.common import full_like, ones_like, zeros_like -from xarray.core.indexes import PandasIndexAdapter +from xarray.core.indexes import PandasIndex from xarray.core.indexing import ( BasicIndexer, CopyOnWriteArray, @@ -535,7 +535,7 @@ def test_copy_index(self): v = self.cls("x", midx) for deep in [True, False]: w = v.copy(deep=deep) - assert isinstance(w._data, PandasIndexAdapter) + assert isinstance(w._data, PandasIndex) assert isinstance(w.to_index(), pd.MultiIndex) assert_array_equal(v._data.array, w._data.array) @@ -2145,7 +2145,7 @@ def test_multiindex_default_level_names(self): def test_data(self): x = IndexVariable("x", np.arange(3.0)) - assert isinstance(x._data, PandasIndexAdapter) + assert isinstance(x._data, PandasIndex) assert isinstance(x.data, np.ndarray) assert float == x.dtype assert_array_equal(np.arange(3), x) @@ -2287,7 +2287,7 @@ def test_coarsen_2d(self): class TestAsCompatibleData: def test_unchanged_types(self): - types = (np.asarray, PandasIndexAdapter, LazilyIndexedArray) + types = (np.asarray, PandasIndex, LazilyIndexedArray) for t in types: for data in [ np.arange(3), From 44230ccba72004b50c1979b42cbbd000e7df6614 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 May 2021 17:36:31 +0200 Subject: [PATCH 11/20] update index type in tests --- xarray/tests/test_dataarray.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index f9d3d23f263..ee2df765d4b 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1643,7 +1643,8 @@ def test_swap_dims(self): assert_identical(expected, actual) for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.xindexes[dim_name], actual.xindexes[dim_name] + expected.xindexes[dim_name].to_pandas_index(), + actual.xindexes[dim_name].to_pandas_index(), ) # as kwargs @@ -1653,7 +1654,8 @@ def test_swap_dims(self): assert_identical(expected, actual) for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.xindexes[dim_name], actual.xindexes[dim_name] + expected.xindexes[dim_name].to_pandas_index(), + actual.xindexes[dim_name].to_pandas_index(), ) # multiindex case @@ -1664,7 +1666,8 @@ def test_swap_dims(self): assert_identical(expected, actual) for dim_name in set().union(expected.xindexes.keys(), actual.xindexes.keys()): pd.testing.assert_index_equal( - expected.xindexes[dim_name].array, actual.xindexes[dim_name].array + expected.xindexes[dim_name].to_pandas_index(), + actual.xindexes[dim_name].to_pandas_index(), ) def test_expand_dims_error(self): From 6f2cd9132a91ab942daaf7cb5e52912436f47552 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 May 2021 17:49:03 +0200 Subject: [PATCH 12/20] ensure .indexes only returns pd.Index objects --- xarray/core/dataarray.py | 19 +++++++++++-------- xarray/core/dataset.py | 19 +++++++++++-------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1cf38960700..79ef27ef570 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -774,14 +774,17 @@ def encoding(self, value: Mapping[Hashable, Any]) -> None: @property def indexes(self) -> Indexes: - """Mapping of xarray Index or pandas.Index objects used for label based indexing.""" - xr_or_pd_indexes = {} - for k, idx in self.xindexes.items(): - try: - xr_or_pd_indexes[k] = idx.to_pandas_index() - except TypeError: - xr_or_pd_indexes[k] = idx - return Indexes(xr_or_pd_indexes) + """Mapping of pandas.Index objects used for label based indexing. + + Raises an error in case where this Dataset has indexes that cannot be coerced + to pandas.Index objects. + + See Also + -------- + DataArray.xindexes + + """ + return Indexes({k: idx.to_pandas_index() for k, idx in self.xindexes.items()}) @property def xindexes(self) -> Indexes: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2da01c68e22..78be3a6f362 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1579,14 +1579,17 @@ def identical(self, other: "Dataset") -> bool: @property def indexes(self) -> Indexes: - """Mapping of xarray Index or pandas.Index objects used for label based indexing.""" - xr_or_pd_indexes = {} - for k, idx in self.xindexes.items(): - try: - xr_or_pd_indexes[k] = idx.to_pandas_index() - except TypeError: - xr_or_pd_indexes[k] = idx - return Indexes(xr_or_pd_indexes) + """Mapping of pandas.Index objects used for label based indexing. + + Raises an error in case where this Dataset has indexes that cannot be coerced + to pandas.Index objects. + + See Also + -------- + Dataset.xindexes + + """ + return Indexes({k: idx.to_pandas_index() for k, idx in self.xindexes.items()}) @property def xindexes(self) -> Indexes: From c3a2d60b95ebb4b87889d66d7da46c68e59da4dc Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 May 2021 17:58:30 +0200 Subject: [PATCH 13/20] PandasIndex: normalize other index in cmp funcs --- xarray/core/indexes.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 9f15f7d7f00..c293117d7d1 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -121,23 +121,18 @@ def shape(self) -> Tuple[int]: def equals(self, other): if isinstance(other, pd.Index): - return self.array.equals(other) - else: - return self.array.equals(other.array) + other = PandasIndex(other) + return isinstance(other, PandasIndex) and self.array.equals(other.array) def union(self, other): if isinstance(other, pd.Index): - new_idx = self.array.union(other) - else: - new_idx = self.array.union(other.array) - return type(self)(new_idx) + other = PandasIndex(other) + return isinstance(other, PandasIndex) and self.array.union(other.array) def intersection(self, other): if isinstance(other, pd.Index): - new_idx = self.array.intersection(other) - else: - new_idx = self.array.intersection(other.array) - return type(self)(new_idx) + other = PandasIndex(other) + return isinstance(other, PandasIndex) and self.array.intersection(other.array) def __getitem__( self, indexer From f8b8ff4c03a812263bf5510cc4745febc10010c4 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 May 2021 18:35:34 +0200 Subject: [PATCH 14/20] fix merge lint errors --- xarray/core/indexing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 88dfaec91e1..d72d000385e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -2,8 +2,6 @@ import functools import operator from collections import defaultdict -from contextlib import suppress -from datetime import timedelta from distutils.version import LooseVersion from typing import Any, Callable, Iterable, List, Sequence, Tuple, Union From be569fce5893f9fbc7056e1cf0ab70fca9479487 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 3 May 2021 18:35:58 +0200 Subject: [PATCH 15/20] fix PandasIndex union/intersection --- xarray/core/indexes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index c293117d7d1..3bf4f9d4a11 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -127,12 +127,12 @@ def equals(self, other): def union(self, other): if isinstance(other, pd.Index): other = PandasIndex(other) - return isinstance(other, PandasIndex) and self.array.union(other.array) + return PandasIndex(self.array.union(other.array)) def intersection(self, other): if isinstance(other, pd.Index): other = PandasIndex(other) - return isinstance(other, PandasIndex) and self.array.intersection(other.array) + return PandasIndex(self.array.intersection(other.array)) def __getitem__( self, indexer From e25348e33444619353a2ac576bc2e47542533f59 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 4 May 2021 10:57:54 +0200 Subject: [PATCH 16/20] [skip-ci] add TODO comment about index sizes --- xarray/core/alignment.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index a373fdf3080..f6e026c0109 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -332,6 +332,9 @@ def align( if dim in unlabeled_dim_sizes: unlabeled_sizes = unlabeled_dim_sizes[dim] + # TODO: benbovy - flexible indexes: expose a size property for xarray.Index? + # Some indexes may not have a defined size (e.g., built from multiple coords of + # different sizes) labeled_size = index.size if len(unlabeled_sizes | {labeled_size}) > 1: raise ValueError( From b8f5de86d3cc0f1a0cc03ba9c95a890ee0bee775 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Tue, 4 May 2021 22:12:29 +0200 Subject: [PATCH 17/20] address more PR comments --- xarray/core/dataarray.py | 2 +- xarray/core/dataset.py | 2 +- xarray/core/indexes.py | 6 ++---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 46aab1c3be2..21daed1cec1 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -776,7 +776,7 @@ def encoding(self, value: Mapping[Hashable, Any]) -> None: def indexes(self) -> Indexes: """Mapping of pandas.Index objects used for label based indexing. - Raises an error in case where this Dataset has indexes that cannot be coerced + Raises an error if this Dataset has indexes that cannot be coerced to pandas.Index objects. See Also diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 17f3c47f41a..97da9ac267e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1611,7 +1611,7 @@ def identical(self, other: "Dataset") -> bool: def indexes(self) -> Indexes: """Mapping of pandas.Index objects used for label based indexing. - Raises an error in case where this Dataset has indexes that cannot be coerced + Raises an error if this Dataset has indexes that cannot be coerced to pandas.Index objects. See Also diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 3bf4f9d4a11..fcc0e7057fb 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -28,7 +28,7 @@ class Index: """Base class inherited by all xarray-compatible indexes.""" - __slots__ = "coord_names" + __slots__ = ("coord_names",) def __init__(self, coord_names: Union[Hashable, Iterable[Hashable]]): if isinstance(coord_names, Iterable) and not isinstance(coord_names, str): @@ -183,9 +183,7 @@ def transpose(self, order) -> pd.Index: return self.array # self.array should be always one-dimensional def __repr__(self) -> str: - return "{}(array={!r}, dtype={!r})".format( - type(self).__name__, self.array, self.dtype - ) + return f"{type(self).__name__}(array={self.array!r}, dtype={self.dtype!r})" def copy(self, deep: bool = True) -> "PandasIndex": # Not the same as just writing `self.array.copy(deep=deep)`, as From 5ee8307785b2d09615a3b698ef63cbb2e9855a0a Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 5 May 2021 15:59:20 +0200 Subject: [PATCH 18/20] [skip-ci] update what's new --- doc/whats-new.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0081d18efb3..5fc517aaabc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -171,7 +171,10 @@ Internal Changes ``pytest.raises(Exception, match="foo")``; (:pull:`5188`), (:pull:`5191`). By `Maximilian Roos `_. - +- Explicit indexes refactor: add an ``xarray.Index`` base class and + ``Dataset.xindexes`` / ``DataArray.xindexes`` properties. Also rename + ``PandasIndexAdapter`` to ``PandasIndex``, which now inherits from + ``xarray.Index`` (:pull:`5102`). By `Benoit Bovy `_. .. _whats-new.0.17.0: From ec0a2d6cd7e5e1aa5f9db8809d98e8ff9c83f8cd Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 10 May 2021 17:12:03 +0200 Subject: [PATCH 19/20] fix coord_names normalization --- xarray/core/indexes.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index fcc0e7057fb..be362e1c942 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -31,10 +31,9 @@ class Index: __slots__ = ("coord_names",) def __init__(self, coord_names: Union[Hashable, Iterable[Hashable]]): - if isinstance(coord_names, Iterable) and not isinstance(coord_names, str): - self.coord_names = tuple(coord_names) - else: - self.coord_names = tuple([coord_names]) + if isinstance(coord_names, Hashable): + coord_names = (coord_names,) + self.coord_names = tuple(coord_names) @classmethod def from_variables( From ce59dece723ca49eaae69779dee5da2aa30d0286 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Mon, 10 May 2021 17:15:40 +0200 Subject: [PATCH 20/20] move what's new entry to unreleased section --- doc/whats-new.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 8f9b5bcceb4..3f81678b8d5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -42,6 +42,10 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Explicit indexes refactor: add an ``xarray.Index`` base class and + ``Dataset.xindexes`` / ``DataArray.xindexes`` properties. Also rename + ``PandasIndexAdapter`` to ``PandasIndex``, which now inherits from + ``xarray.Index`` (:pull:`5102`). By `Benoit Bovy `_. .. _whats-new.0.18.0: @@ -267,10 +271,6 @@ Internal Changes ``pytest.raises(Exception, match="foo")``; (:pull:`5188`), (:pull:`5191`). By `Maximilian Roos `_. -- Explicit indexes refactor: add an ``xarray.Index`` base class and - ``Dataset.xindexes`` / ``DataArray.xindexes`` properties. Also rename - ``PandasIndexAdapter`` to ``PandasIndex``, which now inherits from - ``xarray.Index`` (:pull:`5102`). By `Benoit Bovy `_. .. _whats-new.0.17.0: