From 4465699c78dfe2d0695287955d620401fdf3d4fd Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 22 Mar 2024 21:28:58 -0700 Subject: [PATCH 01/21] pass key tuple to indexing adapters and explicitly indexed arrays --- xarray/core/indexing.py | 203 ++++++++++++++++------------------ xarray/tests/test_indexing.py | 42 +++---- 2 files changed, 116 insertions(+), 129 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index e26c50c8b90..63eb2860266 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -492,29 +492,29 @@ class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): def get_duck_array(self): key = BasicIndexer((slice(None),) * self.ndim) - return self[key] + return self[key.tuple] def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: # This is necessary because we apply the indexing key in self.get_duck_array() # Note this is the base class for all lazy indexing classes return np.asarray(self.get_duck_array(), dtype=dtype) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: tuple[Any, ...]): raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: tuple[Any, ...]): raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -550,7 +550,7 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: def get_duck_array(self): return self.array.get_duck_array() - def __getitem__(self, key: Any): + def __getitem__(self, key: tuple[Any, ...]): key = expanded_indexer(key, self.ndim) indexer = self.indexer_cls(key) @@ -620,7 +620,7 @@ def get_duck_array(self): else: # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray so use its __getitem__ - array = self.array[self.key] + array = self.array[self.key.tuple] # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass @@ -633,31 +633,29 @@ def get_duck_array(self): def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _oindex_get(self, indexer: tuple[Any, ...]): + return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: tuple[Any, ...]): array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return type(self)(self.array, self._updated_key(indexer)) + def __getitem__(self, indexer: tuple[Any, ...]): + return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) - def _vindex_set(self, key: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, key: tuple[Any, ...], value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: OuterIndexer, value: Any) -> None: - full_key = self._updated_key(key) - self.array.oindex[full_key] = value + def _oindex_set(self, key: tuple[Any, ...], value: Any) -> None: + full_key = self._updated_key(OuterIndexer(key)) + self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: BasicIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(key) - full_key = self._updated_key(key) - self.array[full_key] = value + def __setitem__(self, key: tuple[Any, ...], value: Any) -> None: + full_key = self._updated_key(BasicIndexer(key)) + self.array[full_key.tuple] = value def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" @@ -696,7 +694,7 @@ def get_duck_array(self): else: # If the array is not an ExplicitlyIndexedNDArrayMixin, # it may wrap a BackendArray so use its __getitem__ - array = self.array[self.key] + array = self.array[self.key.tuple] # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) @@ -708,25 +706,25 @@ def get_duck_array(self): def _updated_key(self, new_key: ExplicitIndexer): return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _oindex_get(self, indexer: tuple[Any, ...]): + return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: VectorizedIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _vindex_get(self, indexer: tuple[Any, ...]): + return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) + + def __getitem__(self, indexer: tuple[Any, ...]): - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) # If the indexed array becomes a scalar, return LazilyIndexedArray - if all(isinstance(ind, integer_types) for ind in indexer.tuple): - key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple)) + if all(isinstance(ind, integer_types) for ind in indexer): + key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) return LazilyIndexedArray(self.array, key) - return type(self)(self.array, self._updated_key(indexer)) + return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) def transpose(self, order): key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -759,29 +757,27 @@ def _ensure_copied(self): def get_duck_array(self): return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: tuple[Any, ...]): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: tuple[Any, ...]): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: tuple[Any, ...]): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: self._ensure_copied() self.array[indexer] = value @@ -809,27 +805,25 @@ def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: tuple[Any, ...]): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: tuple[Any, ...]): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: tuple[Any, ...]): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: self.array[indexer] = value @@ -995,21 +989,21 @@ def explicit_indexing_adapter( def apply_indexer(indexable, indexer: ExplicitIndexer): """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[indexer] + return indexable.vindex[indexer.tuple] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[indexer] + return indexable.oindex[indexer.tuple] else: - return indexable[indexer] + return indexable[indexer.tuple] def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: """Set values in an indexable object using an indexer.""" if isinstance(indexer, VectorizedIndexer): - indexable.vindex[indexer] = value + indexable.vindex[indexer.tuple] = value elif isinstance(indexer, OuterIndexer): - indexable.oindex[indexer] = value + indexable.oindex[indexer.tuple] = value else: - indexable[indexer] = value + indexable[indexer.tuple] = value def decompose_indexer( @@ -1472,22 +1466,21 @@ def __init__(self, array): def transpose(self, order): return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer): - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_get(self, indexer: tuple[Any, ...]): + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: tuple[Any, ...]): array = NumpyVIndexAdapter(self.array) - return array[indexer.tuple] + return array[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: tuple[Any, ...]): array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = indexer + (Ellipsis,) return array[key] def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: @@ -1503,21 +1496,20 @@ def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: array = NumpyVIndexAdapter(self.array) - self._safe_setitem(array, indexer.tuple, value) + self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = indexer + (Ellipsis,) self._safe_setitem(array, key, value) @@ -1546,30 +1538,28 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: tuple[Any, ...]): # manual orthogonal indexing (implemented like DaskIndexingAdapter) - key = indexer.tuple + value = self.array - for axis, subkey in reversed(list(enumerate(key))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: tuple[Any, ...]): raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: tuple[Any, ...]): + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - self.array[indexer.tuple] = value + def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: + self.array[indexer] = value def transpose(self, order): xp = self.array.__array_namespace__() @@ -1587,38 +1577,35 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer): - key = indexer.tuple + def _oindex_get(self, indexer: tuple[Any, ...]): try: - return self.array[key] + return self.array[indexer] except NotImplementedError: # manual orthogonal indexing value = self.array - for axis, subkey in reversed(list(enumerate(key))): + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: VectorizedIndexer): - return self.array.vindex[indexer.tuple] + def _vindex_get(self, indexer: tuple[Any, ...]): + return self.array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: tuple[Any, ...]): + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer.tuple) + def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( "xarray can't set arrays with multiple " "array indices to dask yet." ) - self.array[indexer.tuple] = value + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: - self.array.vindex[indexer.tuple] = value + def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + self.array.vindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: + self.array[indexer] = value def transpose(self, order): return self.array.transpose(order) @@ -1703,7 +1690,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: tuple[Any, ...] ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1711,7 +1698,7 @@ def _oindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1722,7 +1709,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: tuple[Any, ...] ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1730,7 +1717,7 @@ def _vindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1741,7 +1728,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: ExplicitIndexer + self, indexer: tuple[Any, ...] ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1749,7 +1736,7 @@ def __getitem__( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1814,7 +1801,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: tuple[Any, ...] ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1828,7 +1815,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: tuple[Any, ...] ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1841,7 +1828,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: ExplicitIndexer): + def __getitem__(self, indexer: tuple[Any, ...]): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level @@ -1863,7 +1850,7 @@ def _get_array_subset(self) -> np.ndarray: if self.size > threshold: pos = threshold // 2 indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)]) - subset = self[OuterIndexer((indices,))] + subset = self[(indices,)] else: subset = self diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index f019d3c789c..fd1997eb05a 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -433,7 +433,7 @@ def test_lazily_indexed_array_vindex_setitem(self) -> None: NotImplementedError, match=r"Lazy item assignment with the vectorized indexer is not yet", ): - lazy.vindex[indexer] = 0 + lazy.vindex[indexer.tuple] = 0 @pytest.mark.parametrize( "indexer_class, key, value", @@ -449,10 +449,10 @@ def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None: if indexer_class is indexing.BasicIndexer: indexer = indexer_class(key) - lazy[indexer] = value + lazy[indexer.tuple] = value elif indexer_class is indexing.OuterIndexer: indexer = indexer_class(key) - lazy.oindex[indexer] = value + lazy.oindex[indexer.tuple] = value assert_array_equal(original[key], value) @@ -461,16 +461,16 @@ class TestCopyOnWriteArray: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - wrapped[B[:]] = 0 + wrapped[B[:].tuple] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10)) def test_sub_array(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - child = wrapped[B[:5]] + child = wrapped[B[:5].tuple] assert isinstance(child, indexing.CopyOnWriteArray) - child[B[:]] = 0 + child[B[:].tuple] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.arange(10)) assert_array_equal(child, np.zeros(5)) @@ -478,7 +478,7 @@ def test_sub_array(self) -> None: def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[B[0].tuple][B[()].tuple]) == "foo" class TestMemoryCachedArray: @@ -491,7 +491,7 @@ def test_wrapper(self) -> None: def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) - child = wrapped[B[:5]] + child = wrapped[B[:5].tuple] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) @@ -500,13 +500,13 @@ def test_sub_array(self) -> None: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) - wrapped[B[:]] = 0 + wrapped[B[:].tuple] = 0 assert_array_equal(original, np.zeros(10)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[B[0].tuple][B[()].tuple]) == "foo" def test_base_explicit_indexer() -> None: @@ -615,7 +615,7 @@ def test_arrayize_vectorized_indexer(self) -> None: vindex, self.data.shape ) np.testing.assert_array_equal( - self.data.vindex[vindex], self.data.vindex[vindex_array] + self.data.vindex[vindex.tuple], self.data.vindex[vindex_array.tuple] ) actual = indexing._arrayize_vectorized_indexer( @@ -731,35 +731,35 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: # Dispatch to appropriate indexing method if indexer_mode.startswith("vectorized"): - expected = indexing_adapter.vindex[indexer] + expected = indexing_adapter.vindex[indexer.tuple] elif indexer_mode.startswith("outer"): - expected = indexing_adapter.oindex[indexer] + expected = indexing_adapter.oindex[indexer.tuple] else: - expected = indexing_adapter[indexer] # Basic indexing + expected = indexing_adapter[indexer.tuple] # Basic indexing if isinstance(backend_ind, indexing.VectorizedIndexer): - array = indexing_adapter.vindex[backend_ind] + array = indexing_adapter.vindex[backend_ind.tuple] elif isinstance(backend_ind, indexing.OuterIndexer): - array = indexing_adapter.oindex[backend_ind] + array = indexing_adapter.oindex[backend_ind.tuple] else: - array = indexing_adapter[backend_ind] + array = indexing_adapter[backend_ind.tuple] if len(np_ind.tuple) > 0: array_indexing_adapter = indexing.NumpyIndexingAdapter(array) if isinstance(np_ind, indexing.VectorizedIndexer): - array = array_indexing_adapter.vindex[np_ind] + array = array_indexing_adapter.vindex[np_ind.tuple] elif isinstance(np_ind, indexing.OuterIndexer): - array = array_indexing_adapter.oindex[np_ind] + array = array_indexing_adapter.oindex[np_ind.tuple] else: - array = array_indexing_adapter[np_ind] + array = array_indexing_adapter[np_ind.tuple] np.testing.assert_array_equal(expected, array) if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) assert isinstance(combined_ind, indexing.VectorizedIndexer) - array = indexing_adapter.vindex[combined_ind] + array = indexing_adapter.vindex[combined_ind.tuple] np.testing.assert_array_equal(expected, array) From 728d57fc2fa2a0c47b38345031615d1ec8de94b8 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 22 Mar 2024 21:50:00 -0700 Subject: [PATCH 02/21] update indexing in StackedBytesArray --- xarray/coding/strings.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index db95286f6aa..194cd4a3d74 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -3,6 +3,7 @@ from __future__ import annotations from functools import partial +from typing import Any import numpy as np @@ -240,7 +241,7 @@ def __init__(self, array): @property def dtype(self): - return np.dtype("S" + str(self.array.shape[-1])) + return np.dtype(f"S{str(self.array.shape[-1])}") @property def shape(self) -> tuple[int, ...]: @@ -249,15 +250,15 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key): + def _vindex_get(self, key: tuple[Any, ...]): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key): + def _oindex_get(self, key: tuple[Any, ...]): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key): + def __getitem__(self, key: tuple[Any, ...]): # require slicing the last dimension completely - key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) - if key.tuple[-1] != slice(None): + indexer = indexing.expanded_indexer(key, self.array.ndim) + if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + return _numpy_char_to_bytes(self.array[indexer]) From b68b32bbe46bba92f77b88c1807a6534e01c3a5e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 26 Mar 2024 14:01:17 -0700 Subject: [PATCH 03/21] Update indexing in StackedBytesArray --- xarray/tests/test_coding_strings.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 51f63ea72dd..dd1fe458df4 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -151,9 +151,9 @@ def test_StackedBytesArray() -> None: assert_array_equal(expected, actual) B = IndexerMaker(indexing.BasicIndexer) - assert_array_equal(expected[:1], actual[B[:1]]) + assert_array_equal(expected[:1], actual[B[:1].tuple]) with pytest.raises(IndexError): - actual[B[:, :2]] + actual[B[:, :2].tuple] def test_StackedBytesArray_scalar() -> None: @@ -171,7 +171,7 @@ def test_StackedBytesArray_scalar() -> None: B = IndexerMaker(indexing.BasicIndexer) with pytest.raises(IndexError): - actual[B[:2]] + actual[B[:2].tuple] def test_StackedBytesArray_vectorized_indexing() -> None: @@ -181,7 +181,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: V = IndexerMaker(indexing.VectorizedIndexer) indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked.vindex[indexer] + actual = stacked.vindex[indexer.tuple] assert_array_equal(actual, expected) From 1b8f810582346a0c5f8975270b4bf3e5119d736c Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 27 Mar 2024 16:16:16 -0700 Subject: [PATCH 04/21] Add _IndexerKey type to _typing.py --- xarray/core/indexing.py | 112 +++++++++++++++++------------------ xarray/namedarray/_typing.py | 1 + 2 files changed, 57 insertions(+), 56 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 63eb2860266..b2487921c89 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -35,7 +35,7 @@ from xarray.core.indexes import Index from xarray.core.variable import Variable - from xarray.namedarray._typing import _Shape, duckarray + from xarray.namedarray._typing import _IndexerKey, _Shape, duckarray from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -303,13 +303,13 @@ class ExplicitIndexer: __slots__ = ("_key",) - def __init__(self, key: tuple[Any, ...]): + def __init__(self, key: _IndexerKey): if type(self) is ExplicitIndexer: raise TypeError("cannot instantiate base ExplicitIndexer objects") self._key = tuple(key) @property - def tuple(self) -> tuple[Any, ...]: + def tuple(self) -> _IndexerKey: return self._key def __repr__(self) -> str: @@ -499,22 +499,22 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: # Note this is the base class for all lazy indexing classes return np.asarray(self.get_duck_array(), dtype=dtype) - def _oindex_get(self, indexer: tuple[Any, ...]): + def _oindex_get(self, indexer: _IndexerKey): raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: tuple[Any, ...]): + def _vindex_get(self, indexer: _IndexerKey): raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -550,7 +550,7 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: def get_duck_array(self): return self.array.get_duck_array() - def __getitem__(self, key: tuple[Any, ...]): + def __getitem__(self, key: _IndexerKey): key = expanded_indexer(key, self.ndim) indexer = self.indexer_cls(key) @@ -633,27 +633,27 @@ def get_duck_array(self): def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: tuple[Any, ...]): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: tuple[Any, ...]): + def _vindex_get(self, indexer: _IndexerKey): array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: tuple[Any, ...]): + def __getitem__(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) - def _vindex_set(self, key: tuple[Any, ...], value: Any) -> None: + def _vindex_set(self, key: _IndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: tuple[Any, ...], value: Any) -> None: + def _oindex_set(self, key: _IndexerKey, value: Any) -> None: full_key = self._updated_key(OuterIndexer(key)) self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: tuple[Any, ...], value: Any) -> None: + def __setitem__(self, key: _IndexerKey, value: Any) -> None: full_key = self._updated_key(BasicIndexer(key)) self.array[full_key.tuple] = value @@ -706,13 +706,13 @@ def get_duck_array(self): def _updated_key(self, new_key: ExplicitIndexer): return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: tuple[Any, ...]): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: tuple[Any, ...]): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) - def __getitem__(self, indexer: tuple[Any, ...]): + def __getitem__(self, indexer: _IndexerKey): # If the indexed array becomes a scalar, return LazilyIndexedArray if all(isinstance(ind, integer_types) for ind in indexer): @@ -724,7 +724,7 @@ def transpose(self, order): key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -757,27 +757,27 @@ def _ensure_copied(self): def get_duck_array(self): return self.array.get_duck_array() - def _oindex_get(self, indexer: tuple[Any, ...]): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: tuple[Any, ...]): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: tuple[Any, ...]): + def __getitem__(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self._ensure_copied() self.array[indexer] = value @@ -805,25 +805,25 @@ def get_duck_array(self): self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: tuple[Any, ...]): + def _oindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: tuple[Any, ...]): + def _vindex_get(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: tuple[Any, ...]): + def __getitem__(self, indexer: _IndexerKey): return type(self)(_wrap_numpy_scalars(self.array[indexer])) def transpose(self, order): return self.array.transpose(order) - def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value @@ -1466,15 +1466,15 @@ def __init__(self, array): def transpose(self, order): return self.array.transpose(order) - def _oindex_get(self, indexer: tuple[Any, ...]): + def _oindex_get(self, indexer: _IndexerKey): key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: tuple[Any, ...]): + def _vindex_get(self, indexer: _IndexerKey): array = NumpyVIndexAdapter(self.array) return array[indexer] - def __getitem__(self, indexer: tuple[Any, ...]): + def __getitem__(self, indexer: _IndexerKey): array = self.array # We want 0d slices rather than scalars. This is achieved by @@ -1483,7 +1483,7 @@ def __getitem__(self, indexer: tuple[Any, ...]): key = indexer + (Ellipsis,) return array[key] - def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: + def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: try: array[key] = value except ValueError as exc: @@ -1496,15 +1496,15 @@ def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see @@ -1538,7 +1538,7 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: tuple[Any, ...]): + def _oindex_get(self, indexer: _IndexerKey): # manual orthogonal indexing (implemented like DaskIndexingAdapter) value = self.array @@ -1546,19 +1546,19 @@ def _oindex_get(self, indexer: tuple[Any, ...]): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: tuple[Any, ...]): + def _vindex_get(self, indexer: _IndexerKey): raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: tuple[Any, ...]): + def __getitem__(self, indexer: _IndexerKey): return self.array[indexer] - def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value - def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value def transpose(self, order): @@ -1577,7 +1577,7 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: tuple[Any, ...]): + def _oindex_get(self, indexer: _IndexerKey): try: return self.array[indexer] except NotImplementedError: @@ -1587,13 +1587,13 @@ def _oindex_get(self, indexer: tuple[Any, ...]): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: tuple[Any, ...]): + def _vindex_get(self, indexer: _IndexerKey): return self.array.vindex[indexer] - def __getitem__(self, indexer: tuple[Any, ...]): + def __getitem__(self, indexer: _IndexerKey): return self.array[indexer] - def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _oindex_set(self, indexer: _IndexerKey, value: Any) -> None: num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( @@ -1601,10 +1601,10 @@ def _oindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: ) self.array[indexer] = value - def _vindex_set(self, indexer: tuple[Any, ...], value: Any) -> None: + def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def __setitem__(self, indexer: tuple[Any, ...], value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: self.array[indexer] = value def transpose(self, order): @@ -1667,7 +1667,7 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: tuple[Any, ...]) -> tuple[Any, ...]: + def _prepare_key(self, key: _IndexerKey) -> _IndexerKey: if isinstance(key, tuple) and len(key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) @@ -1690,7 +1690,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: tuple[Any, ...] + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1709,7 +1709,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: tuple[Any, ...] + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1728,7 +1728,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: tuple[Any, ...] + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1801,7 +1801,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: tuple[Any, ...] + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1815,7 +1815,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: tuple[Any, ...] + self, indexer: _IndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1828,7 +1828,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: tuple[Any, ...]): + def __getitem__(self, indexer: _IndexerKey): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index b715973814f..243c2382472 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,6 +95,7 @@ def dtype(self) -> _DType_co: ... _IndexKey = Union[int, slice, "ellipsis"] _IndexKeys = tuple[Union[_IndexKey], ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] +_IndexerKey = tuple[Any, ...] _AttrsLike = Union[Mapping[Any, Any], None] From 52c0f4c64276faabb10bce7c96b2c81ab2375254 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 00:12:27 -0700 Subject: [PATCH 05/21] Update indexing in StackedBytesArray --- xarray/coding/strings.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 194cd4a3d74..266246a4288 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -3,7 +3,6 @@ from __future__ import annotations from functools import partial -from typing import Any import numpy as np @@ -18,6 +17,7 @@ from xarray.core import indexing from xarray.core.utils import module_available from xarray.core.variable import Variable +from xarray.namedarray._typing import _IndexerKey from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array @@ -250,13 +250,13 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key: tuple[Any, ...]): + def _vindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key: tuple[Any, ...]): + def _oindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key: tuple[Any, ...]): + def __getitem__(self, key: _IndexerKey): # require slicing the last dimension completely indexer = indexing.expanded_indexer(key, self.array.ndim) if indexer[-1] != slice(None): From 3c76fc5e6cfdbb43aaaeb946dee36c8e1ef1ea8e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 00:16:36 -0700 Subject: [PATCH 06/21] use tuple indexing in test_backend_array_deprecation_warning --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 64184c523f8..bd78d19799b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -5811,7 +5811,7 @@ def _getitem(self, key): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - la.vindex[indexer].get_duck_array() + la.vindex[indexer.tuple].get_duck_array() captured = capsys.readouterr() assert len(w) == 1 From f062d0d2f3c012ef4d8da309f01d9ba322e5f18d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 04:18:15 -0700 Subject: [PATCH 07/21] Add support for CompatIndexedTuple in explicit indexing adapter This commit updates the `explicit_indexing_adapter` function to accept both `ExplicitIndexer` and the new `CompatIndexedTuple`. The `CompatIndexedTuple` is designed to facilitate the transition towards using raw tuples by carrying additional metadata about the indexing type (basic, vectorized, or outer). --- xarray/coding/strings.py | 4 ++- xarray/core/indexing.py | 73 ++++++++++++++++++++++++++++++++++------ 2 files changed, 65 insertions(+), 12 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 266246a4288..cafaaa19f13 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -257,8 +257,10 @@ def _oindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) def __getitem__(self, key: _IndexerKey): + from xarray.core.indexing import CompatIndexedTuple + # require slicing the last dimension completely indexer = indexing.expanded_indexer(key, self.array.ndim) if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[indexer]) + return _numpy_char_to_bytes(self.array[CompatIndexedTuple(indexer, "basic")]) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 22c7cc4bf77..d447f0b6b6b 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -10,7 +10,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import TYPE_CHECKING, Any, Callable, overload +from typing import TYPE_CHECKING, Any, Callable, Literal, overload import numpy as np import pandas as pd @@ -660,7 +660,7 @@ def get_duck_array(self): category=DeprecationWarning, stacklevel=2, ) - array = self.array[self.key.tuple] + array = self.array[self.key] # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass @@ -681,7 +681,10 @@ def _vindex_get(self, indexer: _IndexerKey): return array.vindex[indexer] def __getitem__(self, indexer: _IndexerKey): - return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) + key = BasicIndexer( + indexer.tuple if isinstance(indexer, ExplicitIndexer) else indexer + ) + return type(self)(self.array, self._updated_key(key)) def _vindex_set(self, key: _IndexerKey, value: Any) -> None: raise NotImplementedError( @@ -741,7 +744,7 @@ def get_duck_array(self): category=PendingDeprecationWarning, stacklevel=2, ) - array = self.array[self.key.tuple] + array = self.array[self.key] # self.array[self.key] is now a numpy array when # self.array is a BackendArray subclass @@ -1034,14 +1037,50 @@ def explicit_indexing_adapter( return result +class CompatIndexedTuple(tuple): + """ + A tuple subclass used to transition existing backend implementations towards the use of raw tuples + for indexing by carrying additional metadata about the type of indexing being + performed ('basic', 'vectorized', or 'outer'). This class serves as a bridge, allowing + backend arrays that currently expect this metadata to function correctly while + maintaining the outward behavior of a regular tuple. + + This class is particularly useful during the phase where the backend implementations are + not yet capable of directly accepting raw tuples without additional context about + the indexing type. It ensures that these backends can still correctly interpret and + process indexing operations by providing them with the necessary contextual information. + """ + + def __new__(cls, iterable, indexer_type: Literal["basic", "vectorized", "outer"]): + obj = super().__new__(cls, iterable) + obj.indexer_type = indexer_type + return obj + + def __repr__(self): + return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" + + +# def _create_compat_indexed_tuple(indexer): +# if isinstance(indexer, BasicIndexer): +# return CompatIndexedTuple(indexer.tuple, "basic") +# elif isinstance(indexer, VectorizedIndexer): +# return CompatIndexedTuple(indexer.tuple, "vectorized") +# elif isinstance(indexer, OuterIndexer): +# return CompatIndexedTuple(indexer.tuple, "outer") +# else: +# raise TypeError( +# f"indexer must be a BasicIndexer, VectorizedIndexer, or OuterIndexer, not {type(indexer)}" +# ) + + def apply_indexer(indexable, indexer: ExplicitIndexer): """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[indexer.tuple] + return indexable.vindex[CompatIndexedTuple(indexer.tuple, "vectorized")] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[indexer.tuple] + return indexable.oindex[CompatIndexedTuple(indexer.tuple, "outer")] else: - return indexable[indexer.tuple] + return indexable[CompatIndexedTuple(indexer.tuple, "basic")] def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: @@ -1055,8 +1094,19 @@ def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: def decompose_indexer( - indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport + indexer: ExplicitIndexer | CompatIndexedTuple, + shape: _Shape, + indexing_support: IndexingSupport, ) -> tuple[ExplicitIndexer, ExplicitIndexer]: + if isinstance(indexer, CompatIndexedTuple): + # recreate the indexer object from the tuple and the type of indexing. + # This is necessary to ensure that the backend array can correctly interpret the indexing operation. + if indexer.indexer_type == "vectorized": + indexer = VectorizedIndexer(indexer) + elif indexer.indexer_type == "outer": + indexer = OuterIndexer(indexer) + else: + indexer = BasicIndexer(indexer) if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) if isinstance(indexer, (BasicIndexer, OuterIndexer)): @@ -1716,12 +1766,13 @@ def _convert_scalar(self, item): return to_0d_array(item) def _prepare_key(self, key: _IndexerKey) -> _IndexerKey: - if isinstance(key, tuple) and len(key) == 1: + _key = key.tuple if isinstance(key, ExplicitIndexer) else key + if isinstance(_key, tuple) and len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) - (key,) = key + (_key,) = _key - return key + return _key def _handle_result( self, result: Any From fb17dc2f063602db1b34f701d72666d9f8d19618 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 04:19:53 -0700 Subject: [PATCH 08/21] remove unused code --- xarray/core/indexing.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index d447f0b6b6b..973b71abe5e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1060,19 +1060,6 @@ def __repr__(self): return f"CompatIndexedTuple({super().__repr__()}, indexer_type='{self.indexer_type}')" -# def _create_compat_indexed_tuple(indexer): -# if isinstance(indexer, BasicIndexer): -# return CompatIndexedTuple(indexer.tuple, "basic") -# elif isinstance(indexer, VectorizedIndexer): -# return CompatIndexedTuple(indexer.tuple, "vectorized") -# elif isinstance(indexer, OuterIndexer): -# return CompatIndexedTuple(indexer.tuple, "outer") -# else: -# raise TypeError( -# f"indexer must be a BasicIndexer, VectorizedIndexer, or OuterIndexer, not {type(indexer)}" -# ) - - def apply_indexer(indexable, indexer: ExplicitIndexer): """Apply an indexer to an indexable object.""" if isinstance(indexer, VectorizedIndexer): From 5fa48ebf9323475b0f90232103deb15723f918b8 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 04:36:24 -0700 Subject: [PATCH 09/21] type hint fixes --- xarray/core/indexing.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 973b71abe5e..b2397b80848 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -575,9 +575,9 @@ def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: def get_duck_array(self): return self.array.get_duck_array() - def __getitem__(self, key: _IndexerKey): - key = expanded_indexer(key, self.ndim) - indexer = self.indexer_cls(key) + def __getitem__(self, key: _IndexerKey | slice): + _key = expanded_indexer(key, self.ndim) + indexer = self.indexer_cls(_key) result = apply_indexer(self.array, indexer) @@ -1053,7 +1053,7 @@ class CompatIndexedTuple(tuple): def __new__(cls, iterable, indexer_type: Literal["basic", "vectorized", "outer"]): obj = super().__new__(cls, iterable) - obj.indexer_type = indexer_type + obj.indexer_type = indexer_type # type: ignore[attr-defined] return obj def __repr__(self): @@ -1088,9 +1088,9 @@ def decompose_indexer( if isinstance(indexer, CompatIndexedTuple): # recreate the indexer object from the tuple and the type of indexing. # This is necessary to ensure that the backend array can correctly interpret the indexing operation. - if indexer.indexer_type == "vectorized": + if indexer.indexer_type == "vectorized": # type: ignore[attr-defined] indexer = VectorizedIndexer(indexer) - elif indexer.indexer_type == "outer": + elif indexer.indexer_type == "outer": # type: ignore[attr-defined] indexer = OuterIndexer(indexer) else: indexer = BasicIndexer(indexer) From cdd605f39a814e342b37bb91630baa09fd2badcd Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 04:45:23 -0700 Subject: [PATCH 10/21] fix docstrings --- xarray/coding/variables.py | 2 +- xarray/core/indexing.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..7b5621cceb3 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -100,7 +100,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): dtype('int16') >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> NativeEndiannessArray(x)[indexer].dtype + >>> NativeEndiannessArray(x)[indexer.tuple].dtype dtype('int16') """ diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index b2397b80848..d8847a7c538 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1162,10 +1162,10 @@ def _decompose_vectorized_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3]))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array).oindex[backend_indexer] + ... array = NumpyIndexingAdapter(array).oindex[backend_indexer.tuple] >>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # vectorized indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).vindex[np_indexer] + ... NumpyIndexingAdapter(array).vindex[np_indexer.tuple] array([ 2, 21, 8]) """ assert isinstance(indexer, VectorizedIndexer) @@ -1244,10 +1244,10 @@ def _decompose_outer_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = BasicIndexer((slice(0, 3), slice(2, 4))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array)[backend_indexer] + ... array = NumpyIndexingAdapter(array)[backend_indexer.tuple] >>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # outer indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).oindex[np_indexer] + ... NumpyIndexingAdapter(array).oindex[np_indexer.tuple] array([[ 2, 3, 2], [14, 15, 14], [ 8, 9, 8]]) From f35a72bc1c9ba0eea410812ced7b4e7f1a917236 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 04:59:48 -0700 Subject: [PATCH 11/21] fix tests --- xarray/tests/test_dataset.py | 32 +++++++++++++++----------------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 301596e032f..75ae4d67574 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -244,7 +244,7 @@ def get_array(self): return self.array def __getitem__(self, key): - return self.array[key.tuple] + return self.array[(key if isinstance(key, tuple) else key.tuple)] class AccessibleAsDuckArrayDataStore(backends.InMemoryDataStore): @@ -5070,28 +5070,26 @@ def test_lazy_load(self) -> None: ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - def test_lazy_load_duck_array(self) -> None: + @pytest.mark.parametrize("decode_cf", [True, False]) + def test_lazy_load_duck_array(self, decode_cf) -> None: store = AccessibleAsDuckArrayDataStore() create_test_data().dump_to_store(store) - for decode_cf in [True, False]: - ds = open_dataset(store, decode_cf=decode_cf) - with pytest.raises(UnexpectedDataAccess): - ds["var1"].values + ds = open_dataset(store, decode_cf=decode_cf) + with pytest.raises(UnexpectedDataAccess): + ds["var1"].values - # these should not raise UnexpectedDataAccess: - ds.var1.data - ds.isel(time=10) - ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - repr(ds) + # these should not raise UnexpectedDataAccess: + ds.var1.data + ds.isel(time=10) + ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) + repr(ds) - # preserve the duck array type and don't cast to array - assert isinstance(ds["var1"].load().data, DuckArrayWrapper) - assert isinstance( - ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper - ) + # preserve the duck array type and don't cast to array + assert isinstance(ds["var1"].load().data, DuckArrayWrapper) + assert isinstance(ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper) - ds.close() + ds.close() def test_dropna(self) -> None: x = np.random.randn(4, 4) From e646bbfcf5e3ec7e348495b19469ea9a7c37c135 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 05:05:13 -0700 Subject: [PATCH 12/21] fix docstrings --- xarray/coding/strings.py | 2 +- xarray/coding/variables.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index cafaaa19f13..e8b63408330 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -222,7 +222,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): values, when accessed, are automatically stacked along the last dimension. >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer] + >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer.tuple] array(b'abc', dtype='|S3') """ diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 7b5621cceb3..6ce497dc952 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -138,7 +138,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): dtype('bool') >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> BoolTypeArray(x)[indexer].dtype + >>> BoolTypeArray(x)[indexer.tuple].dtype dtype('bool') """ From 2b273ab97569e59120437d6aee2397bdf7bf3c2d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:13:46 -0700 Subject: [PATCH 13/21] Apply suggestions from code review Co-authored-by: Deepak Cherian --- xarray/coding/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index e8b63408330..2e55ccdca14 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -222,7 +222,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): values, when accessed, are automatically stacked along the last dimension. >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer.tuple] + >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[slice(None)] array(b'abc', dtype='|S3') """ From 237bbe21a0e9587ecaaa0c325ae3cc4741787dae Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 30 Apr 2024 14:23:58 -0700 Subject: [PATCH 14/21] update docstrings and pass tuples directly --- xarray/coding/strings.py | 3 +-- xarray/coding/variables.py | 3 +-- xarray/core/indexing.py | 5 ++--- xarray/tests/test_coding_strings.py | 4 +--- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 2e55ccdca14..72b4c0b4bfd 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -221,8 +221,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[slice(None)] + >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[(slice(None),)] array(b'abc', dtype='|S3') """ diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 6ce497dc952..c0ca71b5d7c 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -99,8 +99,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> NativeEndiannessArray(x).dtype dtype('int16') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> NativeEndiannessArray(x)[indexer.tuple].dtype + >>> NativeEndiannessArray(x)[(slice(None),)].dtype dtype('int16') """ diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index d8847a7c538..ae31e66d528 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -516,8 +516,7 @@ class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () def get_duck_array(self): - key = BasicIndexer((slice(None),) * self.ndim) - return self[key.tuple] + return self[(slice(None),) * self.ndim] def __array__(self, dtype: np.typing.DTypeLike = None) -> np.ndarray: # This is necessary because we apply the indexing key in self.get_duck_array() @@ -1752,7 +1751,7 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: _IndexerKey) -> _IndexerKey: + def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: _key = key.tuple if isinstance(key, ExplicitIndexer) else key if isinstance(_key, tuple) and len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index dd1fe458df4..a0db7471e11 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -179,9 +179,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: stacked = strings.StackedBytesArray(array) expected = np.array([[b"abc", b"def"], [b"def", b"abc"]]) - V = IndexerMaker(indexing.VectorizedIndexer) - indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked.vindex[indexer.tuple] + actual = stacked.vindex[(np.array([[0, 1], [1, 0]]),)] assert_array_equal(actual, expected) From a91780737ebbf4f5ceede73d0ac3945bf427a0c0 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 30 Apr 2024 17:05:04 -0600 Subject: [PATCH 15/21] Some test cleanup --- xarray/tests/test_coding_strings.py | 5 ++--- xarray/tests/test_indexing.py | 17 +++++++---------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index a0db7471e11..30d58434a55 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -150,10 +150,9 @@ def test_StackedBytesArray() -> None: assert len(actual) == len(expected) assert_array_equal(expected, actual) - B = IndexerMaker(indexing.BasicIndexer) - assert_array_equal(expected[:1], actual[B[:1].tuple]) + assert_array_equal(expected[:1], actual[slice(1)]) with pytest.raises(IndexError): - actual[B[:, :2].tuple] + actual[slice(None), slice(2)] def test_StackedBytesArray_scalar() -> None: diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index fd1997eb05a..b5da4a75439 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -12,7 +12,6 @@ from xarray.core.indexes import PandasIndex, PandasMultiIndex from xarray.core.types import T_Xarray from xarray.tests import ( - IndexerMaker, ReturnItem, assert_array_equal, assert_identical, @@ -20,8 +19,6 @@ requires_dask, ) -B = IndexerMaker(indexing.BasicIndexer) - class TestIndexCallable: def test_getitem(self): @@ -461,16 +458,16 @@ class TestCopyOnWriteArray: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - wrapped[B[:].tuple] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10)) def test_sub_array(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - child = wrapped[B[:5].tuple] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.CopyOnWriteArray) - child[B[:].tuple] = 0 + child[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.arange(10)) assert_array_equal(child, np.zeros(5)) @@ -478,7 +475,7 @@ def test_sub_array(self) -> None: def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) - assert np.array(x[B[0].tuple][B[()].tuple]) == "foo" + assert np.array(x[(0,)][()]) == "foo" class TestMemoryCachedArray: @@ -491,7 +488,7 @@ def test_wrapper(self) -> None: def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) - child = wrapped[B[:5].tuple] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) @@ -500,13 +497,13 @@ def test_sub_array(self) -> None: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) - wrapped[B[:].tuple] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.zeros(10)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(["foo", "bar"])) - assert np.array(x[B[0].tuple][B[()].tuple]) == "foo" + assert np.array(x[(0,)][()]) == "foo" def test_base_explicit_indexer() -> None: From bae5fbd67f99b2bea34743f5a25bc417e6892365 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 2 May 2024 05:55:51 -0700 Subject: [PATCH 16/21] update docstring --- xarray/coding/variables.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index c0ca71b5d7c..98bbbbaeb2c 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -136,8 +136,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> BoolTypeArray(x).dtype dtype('bool') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> BoolTypeArray(x)[indexer.tuple].dtype + >>> BoolTypeArray(x)[(slice(None),)].dtype dtype('bool') """ From 203849a6356a019dc964b353912439542b154283 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 2 May 2024 05:58:40 -0700 Subject: [PATCH 17/21] use `BasicIndexer` instead of `CompatIndexedTuple` --- xarray/coding/strings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 72b4c0b4bfd..6df92c256b9 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -256,10 +256,10 @@ def _oindex_get(self, key: _IndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) def __getitem__(self, key: _IndexerKey): - from xarray.core.indexing import CompatIndexedTuple + from xarray.core.indexing import BasicIndexer # require slicing the last dimension completely indexer = indexing.expanded_indexer(key, self.array.ndim) if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[CompatIndexedTuple(indexer, "basic")]) + return _numpy_char_to_bytes(self.array[BasicIndexer(indexer)]) From ab0db231c9d8528832151bb93eb10acf34e6f78a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 2 May 2024 06:04:42 -0700 Subject: [PATCH 18/21] support explicit indexing with tuples --- xarray/core/indexing.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index ae31e66d528..edd33079dab 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1558,13 +1558,17 @@ def _vindex_get(self, indexer: _IndexerKey): array = NumpyVIndexAdapter(self.array) return array[indexer] - def __getitem__(self, indexer: _IndexerKey): + def __getitem__(self, indexer: _IndexerKey | ExplicitIndexer): array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) return array[key] def _safe_setitem(self, array, key: _IndexerKey, value: Any) -> None: @@ -1588,12 +1592,16 @@ def _vindex_set(self, indexer: _IndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: _IndexerKey, value: Any) -> None: + def __setitem__(self, indexer: _IndexerKey | ExplicitIndexer, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) self._safe_setitem(array, key, value) From 8f74b18ac170536db65a84c24d371f3c642a212d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 2 May 2024 06:11:57 -0700 Subject: [PATCH 19/21] fix mypy errors --- xarray/tests/test_coding_strings.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 30d58434a55..0feac5b15eb 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -7,9 +7,7 @@ from xarray import Variable from xarray.coding import strings -from xarray.core import indexing from xarray.tests import ( - IndexerMaker, assert_array_equal, assert_identical, requires_dask, @@ -150,7 +148,7 @@ def test_StackedBytesArray() -> None: assert len(actual) == len(expected) assert_array_equal(expected, actual) - assert_array_equal(expected[:1], actual[slice(1)]) + assert_array_equal(expected[:1], actual[(slice(1),)]) with pytest.raises(IndexError): actual[slice(None), slice(2)] @@ -167,10 +165,8 @@ def test_StackedBytesArray_scalar() -> None: with pytest.raises(TypeError): len(actual) np.testing.assert_array_equal(expected, actual) - - B = IndexerMaker(indexing.BasicIndexer) with pytest.raises(IndexError): - actual[B[:2].tuple] + actual[(slice(2),)] def test_StackedBytesArray_vectorized_indexing() -> None: From 3ee14aa66e5376c4ec3f80057d19c4793110e9e8 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 2 May 2024 06:12:26 -0700 Subject: [PATCH 20/21] remove unused IndexerMaker --- xarray/tests/__init__.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index c202e191293..f2d95ece8fb 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -264,16 +264,6 @@ def __getitem__(self, key): return key -class IndexerMaker: - def __init__(self, indexer_cls): - self._indexer_cls = indexer_cls - - def __getitem__(self, key): - if not isinstance(key, tuple): - key = (key,) - return self._indexer_cls(key) - - def source_ndarray(array): """Given an ndarray, return the base object which holds its memory, or the object itself. From 303b2e6307df12afa8074e712d63e96b2471327a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 2 May 2024 06:44:57 -0700 Subject: [PATCH 21/21] Update LazilyIndexedArray._updated_key to support explicit indexing with tuples --- xarray/core/indexing.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index edd33079dab..2b8cd202e4e 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -622,8 +622,13 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): self.array = as_indexable(array) self.key = key - def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: - iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) + def _updated_key( + self, new_key: ExplicitIndexer | _IndexerKey + ) -> BasicIndexer | OuterIndexer: + _new_key_tuple = ( + new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key + ) + iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) full_key = [] for size, k in zip(self.array.shape, self.key.tuple): if isinstance(k, integer_types): @@ -673,17 +678,14 @@ def transpose(self, order): return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) def _oindex_get(self, indexer: _IndexerKey): - return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) + return type(self)(self.array, self._updated_key(indexer)) def _vindex_get(self, indexer: _IndexerKey): array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] def __getitem__(self, indexer: _IndexerKey): - key = BasicIndexer( - indexer.tuple if isinstance(indexer, ExplicitIndexer) else indexer - ) - return type(self)(self.array, self._updated_key(key)) + return type(self)(self.array, self._updated_key(indexer)) def _vindex_set(self, key: _IndexerKey, value: Any) -> None: raise NotImplementedError(