Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate tuples of chunks? #8341

Merged
merged 6 commits into from
Oct 21, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ Breaking changes
Deprecations
~~~~~~~~~~~~

- Supplying dimension-ordered sequences to :py:meth:`DataArray.chunk` &
:py:meth:`Dataset.chunk` is deprecated in favor of supplying a dictionary of
dimensions, or a single ``int`` or ``"auto"`` argument covering all
dimensions. Xarray favors using dimensions names rather than positions, and
this was one place in the API where dimension positions were used.
(:pull:`8341`)
By `Maximilian Roos <https://github.com/max-sixty>`_.

Bug fixes
~~~~~~~~~
Expand Down
5 changes: 5 additions & 0 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,11 @@ def chunk(
# ignoring type; unclear why it won't accept a Literal into the value.
chunks = dict.fromkeys(self.dims, chunks)
elif isinstance(chunks, (tuple, list)):
utils.emit_user_level_warning(
"Supplying chunks as dimension-order tuples is deprecated. "
"It will raise an error in the future. Instead use a dict with dimensions as keys.",
category=DeprecationWarning,
)
chunks = dict(zip(self.dims, chunks))
else:
chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")
Expand Down
8 changes: 7 additions & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2647,11 +2647,17 @@ def chunk(
warnings.warn(
"None value for 'chunks' is deprecated. "
"It will raise an error in the future. Use instead '{}'",
category=FutureWarning,
category=DeprecationWarning,
)
chunks = {}
chunks_mapping: Mapping[Any, Any]
if not isinstance(chunks, Mapping) and chunks is not None:
if isinstance(chunks, (tuple, list)):
utils.emit_user_level_warning(
"Supplying chunks as dimension-order tuples is deprecated. "
"It will raise an error in the future. Instead use a dict with dimensions as keys.",
category=DeprecationWarning,
)
chunks_mapping = dict.fromkeys(self.dims, chunks)
else:
chunks_mapping = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")
Expand Down
4 changes: 1 addition & 3 deletions xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,7 @@ def copy(
# FYI in some cases we don't allow `None`, which this doesn't take account of.
T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]]
# We allow the tuple form of this (though arguably we could transition to named dims only)
T_Chunks: TypeAlias = Union[
T_ChunkDim, Mapping[Any, T_ChunkDim], tuple[T_ChunkDim, ...]
]
T_Chunks: TypeAlias = Union[T_ChunkDim, Mapping[Any, T_ChunkDim]]
T_NormalizedChunks = tuple[tuple[int, ...], ...]

DataVars = Mapping[Any, Any]
Expand Down
11 changes: 3 additions & 8 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from collections.abc import Hashable, Mapping, Sequence
from datetime import timedelta
from functools import partial
from typing import TYPE_CHECKING, Any, Callable, Literal, NoReturn, cast
from typing import TYPE_CHECKING, Any, Callable, NoReturn, cast

import numpy as np
import pandas as pd
Expand All @@ -34,6 +34,7 @@
is_duck_dask_array,
to_numpy,
)
from xarray.core.types import T_Chunks
from xarray.core.utils import (
OrderedSet,
_default,
Expand Down Expand Up @@ -965,13 +966,7 @@ def _replace(

def chunk(
self,
chunks: (
int
| Literal["auto"]
| tuple[int, ...]
| tuple[tuple[int, ...], ...]
| Mapping[Any, None | int | tuple[int, ...]]
) = {},
chunks: T_Chunks = {},
name: str | None = None,
lock: bool | None = None,
inline_array: bool | None = None,
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2798,7 +2798,7 @@ def test_write_empty(self, write_empty: bool) -> None:
)

if has_dask:
ds["test"] = ds["test"].chunk((1, 1, 1))
ds["test"] = ds["test"].chunk(1)
encoding = None
else:
encoding = {"test": {"chunks": (1, 1, 1)}}
Expand Down
13 changes: 7 additions & 6 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -879,13 +879,14 @@ def test_chunk(self) -> None:
assert blocked.chunks == ((3,), (4,))
first_dask_name = blocked.data.name

blocked = unblocked.chunk(chunks=((2, 1), (2, 2)))
assert blocked.chunks == ((2, 1), (2, 2))
assert blocked.data.name != first_dask_name
with pytest.warns(DeprecationWarning):
blocked = unblocked.chunk(chunks=((2, 1), (2, 2))) # type: ignore
assert blocked.chunks == ((2, 1), (2, 2))
assert blocked.data.name != first_dask_name

blocked = unblocked.chunk(chunks=(3, 3))
assert blocked.chunks == ((3,), (3, 1))
assert blocked.data.name != first_dask_name
blocked = unblocked.chunk(chunks=(3, 3))
assert blocked.chunks == ((3,), (3, 1))
assert blocked.data.name != first_dask_name

# name doesn't change when rechunking by same amount
# this fails if ReprObject doesn't have __dask_tokenize__ defined
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2663,7 +2663,7 @@ def test_full_like(self) -> None:
def test_full_like_dask(self) -> None:
orig = Variable(
dims=("x", "y"), data=[[1.5, 2.0], [3.1, 4.3]], attrs={"foo": "bar"}
).chunk(((1, 1), (2,)))
).chunk(dict(x=(1, 1), y=(2,)))

def check(actual, expect_dtype, expect_values):
assert actual.dtype == expect_dtype
Expand Down