From c788ee44008cdd65c8b6de40c737f1b28e173496 Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Thu, 23 Apr 2020 03:58:09 -0400 Subject: [PATCH 01/71] DOC: add pandas.DataFrame.to_xarray (#3994) Co-authored-by: Ray Bell --- xarray/core/dataset.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d811d54847f..53aa00f22ce 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4598,6 +4598,7 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> "Datas See also -------- xarray.DataArray.from_series + pandas.DataFrame.to_xarray """ # TODO: Add an option to remove dimensions along which the variables # are constant, to enable consistent serialization to/from a dataframe, From 37551da5ebc7861439ac3eefddefb534b76f2895 Mon Sep 17 00:00:00 2001 From: Prajjwal Nijhara Date: Fri, 24 Apr 2020 12:44:54 +0530 Subject: [PATCH 02/71] Fix some code quality and bug-risk issues (#3999) --- .deepsource.toml | 18 ++++++++++++++++++ xarray/convert.py | 6 +++--- xarray/core/computation.py | 4 ++-- xarray/core/formatting.py | 12 +++++------- xarray/core/groupby.py | 4 +++- xarray/plot/plot.py | 2 +- 6 files changed, 32 insertions(+), 14 deletions(-) create mode 100644 .deepsource.toml diff --git a/.deepsource.toml b/.deepsource.toml new file mode 100644 index 00000000000..e37b41de303 --- /dev/null +++ b/.deepsource.toml @@ -0,0 +1,18 @@ +version = 1 + +test_patterns = [ + "*/tests/**", + "*/test_*.py" +] + +exclude_patterns = [ + "doc/**", + "ci/**" +] + +[[analyzers]] +name = "python" +enabled = true + + [analyzers.meta] + runtime_version = "3.x.x" \ No newline at end of file diff --git a/xarray/convert.py b/xarray/convert.py index 4974a55d8e2..0c86b090f34 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -229,11 +229,11 @@ def _iris_cell_methods_to_str(cell_methods_obj): """ cell_methods = [] for cell_method in cell_methods_obj: - names = "".join([f"{n}: " for n in cell_method.coord_names]) + names = "".join(f"{n}: " for n in cell_method.coord_names) intervals = " ".join( - [f"interval: {interval}" for interval in cell_method.intervals] + f"interval: {interval}" for interval in cell_method.intervals ) - comments = " ".join([f"comment: {comment}" for comment in cell_method.comments]) + comments = " ".join(f"comment: {comment}" for comment in cell_method.comments) extra = " ".join([intervals, comments]).strip() if extra: extra = f" ({extra})" diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 6cf4178b5bf..a3723ea9db9 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1192,10 +1192,10 @@ def dot(*arrays, dims=None, **kwargs): # construct einsum subscripts, such as '...abc,...ab->...c' # Note: input_core_dims are always moved to the last position subscripts_list = [ - "..." + "".join([dim_map[d] for d in ds]) for ds in input_core_dims + "..." + "".join(dim_map[d] for d in ds) for ds in input_core_dims ] subscripts = ",".join(subscripts_list) - subscripts += "->..." + "".join([dim_map[d] for d in output_core_dims[0]]) + subscripts += "->..." + "".join(dim_map[d] for d in output_core_dims[0]) join = OPTIONS["arithmetic_join"] # using "inner" emulates `(a * b).sum()` for all joins (except "exact") diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 534d253ecc8..d6732fc182e 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -298,12 +298,10 @@ def _summarize_coord_multiindex(coord, col_width, marker): def _summarize_coord_levels(coord, col_width, marker="-"): return "\n".join( - [ - summarize_variable( - lname, coord.get_level_variable(lname), col_width, marker=marker - ) - for lname in coord.level_names - ] + summarize_variable( + lname, coord.get_level_variable(lname), col_width, marker=marker + ) + for lname in coord.level_names ) @@ -562,7 +560,7 @@ def extra_items_repr(extra_keys, mapping, ab_side): for m in (a_mapping, b_mapping): attr_s = "\n".join( - [summarize_attr(ak, av) for ak, av in m[k].attrs.items()] + summarize_attr(ak, av) for ak, av in m[k].attrs.items() ) attrs_summary.append(attr_s) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 5a5f4c0d296..148e16863d1 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -273,7 +273,7 @@ def __init__( grouper=None, bins=None, restore_coord_dims=None, - cut_kwargs={}, + cut_kwargs=None, ): """Create a GroupBy object @@ -299,6 +299,8 @@ def __init__( Extra keyword arguments to pass to `pandas.cut` """ + if cut_kwargs is None: + cut_kwargs = {} from .dataarray import DataArray if grouper is not None and bins is not None: diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 4657bee9415..4d6033bf00d 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -30,7 +30,7 @@ def _infer_line_data(darray, x, y, hue): error_msg = "must be either None or one of ({:s})".format( - ", ".join([repr(dd) for dd in darray.dims]) + ", ".join(repr(dd) for dd in darray.dims) ) ndims = len(darray.dims) From 6ca3bd7148748fbf03d3ede653a83287f852e472 Mon Sep 17 00:00:00 2001 From: Huite Date: Fri, 24 Apr 2020 09:15:43 +0200 Subject: [PATCH 03/71] full_like: error on non-scalar fill_value (#3979) * Avoid multiplication DeprecationWarning in rasterio backend * full_like: error on non-scalar fill_value Fixes #3977 * Added test * Updated what's new * core.utils.is_scalar instead of numpy.is_scalar * More informative error message * raises_regex for error test --- doc/whats-new.rst | 2 ++ xarray/core/common.py | 5 ++++- xarray/tests/test_variable.py | 4 ++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fc95e26dabd..7b2b3530c41 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -58,6 +58,8 @@ New Features Bug fixes ~~~~~~~~~ +- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue`3977`) + By `Huite Bootsma `_. - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) By `Keisuke Fujii `_. - Fix renaming of coords when one or more stacked coords is not in diff --git a/xarray/core/common.py b/xarray/core/common.py index 8f6d57e9f12..1e7069ec51f 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -25,7 +25,7 @@ from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp -from .utils import Frozen, either_dict_or_kwargs +from .utils import Frozen, either_dict_or_kwargs, is_scalar # Used as a sentinel value to indicate a all dimensions ALL_DIMS = ... @@ -1397,6 +1397,9 @@ def full_like(other, fill_value, dtype: DTypeLike = None): from .dataset import Dataset from .variable import Variable + if not is_scalar(fill_value): + raise ValueError(f"fill_value must be scalar. Received {fill_value} instead.") + if isinstance(other, Dataset): data_vars = { k: _full_like_variable(v, fill_value, dtype) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 78e3848b8fb..3003e0d66f3 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -2213,6 +2213,10 @@ def test_full_like(self): assert expect.dtype == bool assert_identical(expect, full_like(orig, True, dtype=bool)) + # raise error on non-scalar fill_value + with raises_regex(ValueError, "must be scalar"): + full_like(orig, [1.0, 2.0]) + @requires_dask def test_full_like_dask(self): orig = Variable( From 33a66d6380c26a59923922ee11e8ffcf0b4f379f Mon Sep 17 00:00:00 2001 From: Ryan May Date: Fri, 24 Apr 2020 01:16:09 -0600 Subject: [PATCH 04/71] Fix handling of abbreviated units like msec (#3998) * Fix handling of abbreviated units like msec By default, xarray tries to decode times with pandas and falls back to cftime. This fixes the exception handler to fallback properly in the cases an unhandled abbreviated unit is passed in. * Add what's new entry --- doc/whats-new.rst | 4 +++- xarray/coding/times.py | 2 +- xarray/tests/test_coding_times.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7b2b3530c41..6fc3260f10d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -82,7 +82,9 @@ Bug fixes - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, and added `keep_attrs` argument. (:issue:`3968`) By `Tom Nicholas `_. - +- Fix bug in time parsing failing to fall back to cftime. This was causing time + variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) + By `Ryan May `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 965ddd8f043..d923f1ad088 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -155,7 +155,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): if use_cftime is None: try: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) - except (OutOfBoundsDatetime, OverflowError): + except (KeyError, OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( flat_num_dates.astype(np.float), units, calendar ) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 00c34940ce4..1efd4b02bf8 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -432,6 +432,18 @@ def test_decode_360_day_calendar(): assert_array_equal(actual, expected) +@requires_cftime +def test_decode_abbreviation(): + """Test making sure we properly fall back to cftime on abbreviated units.""" + import cftime + + val = np.array([1586628000000.0]) + units = "msecs since 1970-01-01T00:00:00Z" + actual = coding.times.decode_cf_datetime(val, units) + expected = coding.times.cftime_to_nptime(cftime.num2date(val, units)) + assert_array_equal(actual, expected) + + @arm_xfail @requires_cftime @pytest.mark.parametrize( From 4e196f74dccabbc82f43df7806dc0c7810ba526a Mon Sep 17 00:00:00 2001 From: arabidopsis Date: Wed, 29 Apr 2020 23:54:22 +0800 Subject: [PATCH 05/71] ensure Variable._repr_html_ works (#3973) * ensure Variable._repr_html_ works * added PR 3972 to Bug fixes * better attribute access * moved Varible._repr_html_ test to better location Co-authored-by: Stephan Hoyer Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 3 +++ xarray/core/formatting_html.py | 3 ++- xarray/tests/test_formatting_html.py | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6fc3260f10d..b71e0baa655 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -79,6 +79,9 @@ Bug fixes By `Tom Nicholas `_. - Fix ``RasterioDeprecationWarning`` when using a ``vrt`` in ``open_rasterio``. (:issue:`3964`) By `Taher Chegini `_. +- Fix ``AttributeError`` on displaying a :py:class:`Variable` + in a notebook context. (:issue:`3972`, :pull:`3973`) + By `Ian Castleden `_. - Fix bug causing :py:meth:`DataArray.interpolate_na` to always drop attributes, and added `keep_attrs` argument. (:issue:`3968`) By `Tom Nicholas `_. diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 8678a58b381..6e345582ed0 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -183,7 +183,8 @@ def array_section(obj): # "unique" id to expand/collapse the section data_id = "section-" + str(uuid.uuid4()) collapsed = "" - preview = escape(inline_variable_array_repr(obj.variable, max_width=70)) + variable = getattr(obj, "variable", obj) + preview = escape(inline_variable_array_repr(variable, max_width=70)) data_repr = short_data_repr_html(obj) data_icon = _icon("icon-database") diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 239f339208d..94653016416 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -137,3 +137,15 @@ def test_repr_of_dataset(dataset): ) assert "<U4" in formatted or ">U4" in formatted assert "<IA>" in formatted + + +def test_variable_repr_html(): + v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"}) + assert hasattr(v, "_repr_html_") + with xr.set_options(display_style="html"): + html = v._repr_html_().strip() + # We don't do a complete string identity since + # html output is probably subject to change, is long and... reasons. + # Just test that something reasonable was produced. + assert html.startswith("") + assert "xarray.Variable" in html From 8834afa9f617bd201eba00374bb55d96dccec96b Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 29 Apr 2020 18:10:09 +0200 Subject: [PATCH 06/71] Apply blackdoc to the documentation (#4012) * replace tabs with spaces * fix some invalid code * add missing prompts * apply blackdoc * reformat the plotting docs code * whats-new.rst entry --- doc/combining.rst | 62 ++++++------ doc/computation.rst | 165 +++++++++++++++++--------------- doc/contributing.rst | 28 +++--- doc/dask.rst | 66 ++++++++----- doc/data-structures.rst | 144 +++++++++++++++------------- doc/faq.rst | 9 +- doc/groupby.rst | 56 ++++++----- doc/indexing.rst | 99 +++++++++---------- doc/internals.rst | 21 +++-- doc/interpolation.rst | 118 ++++++++++++----------- doc/io.rst | 116 ++++++++++++++--------- doc/pandas.rst | 36 ++++--- doc/plotting.rst | 151 ++++++++++++++++------------- doc/quick-overview.rst | 39 ++++---- doc/reshaping.rst | 112 +++++++++++----------- doc/time-series.rst | 54 ++++++----- doc/weather-climate.rst | 72 +++++++------- doc/whats-new.rst | 199 ++++++++++++++++++++------------------- xarray/core/common.py | 4 +- xarray/core/dataarray.py | 55 ++++++----- xarray/core/dataset.py | 66 +++++++------ 21 files changed, 917 insertions(+), 755 deletions(-) diff --git a/doc/combining.rst b/doc/combining.rst index 05b7f2efc50..ffc6575c579 100644 --- a/doc/combining.rst +++ b/doc/combining.rst @@ -4,11 +4,12 @@ Combining data -------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) * For combining datasets or data arrays along a single dimension, see concatenate_. @@ -28,11 +29,10 @@ that dimension: .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray(np.random.randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])]) arr[:, :1] # this resembles how you would use np.concatenate - xr.concat([arr[:, :1], arr[:, 1:]], dim='y') + xr.concat([arr[:, :1], arr[:, 1:]], dim="y") In addition to combining along an existing dimension, ``concat`` can create a new dimension by stacking lower dimensional arrays together: @@ -41,7 +41,7 @@ new dimension by stacking lower dimensional arrays together: arr[0] # to combine these 1d arrays into a 2d array in numpy, you would use np.array - xr.concat([arr[0], arr[1]], 'x') + xr.concat([arr[0], arr[1]], "x") If the second argument to ``concat`` is a new dimension name, the arrays will be concatenated along that new dimension, which is always inserted as the first @@ -49,7 +49,7 @@ dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], 'new_dim') + xr.concat([arr[0], arr[1]], "new_dim") The second argument to ``concat`` can also be an :py:class:`~pandas.Index` or :py:class:`~xarray.DataArray` object as well as a string, in which case it is @@ -57,14 +57,14 @@ used to label the values along the new dimension: .. ipython:: python - xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name='new_dim')) + xr.concat([arr[0], arr[1]], pd.Index([-90, -100], name="new_dim")) Of course, ``concat`` also works on ``Dataset`` objects: .. ipython:: python - ds = arr.to_dataset(name='foo') - xr.concat([ds.sel(x='a'), ds.sel(x='b')], 'x') + ds = arr.to_dataset(name="foo") + xr.concat([ds.sel(x="a"), ds.sel(x="b")], "x") :py:func:`~xarray.concat` has a number of options which provide deeper control over which variables are concatenated and how it handles conflicting variables @@ -84,8 +84,8 @@ To combine variables and coordinates between multiple ``DataArray`` and/or .. ipython:: python - xr.merge([ds, ds.rename({'foo': 'bar'})]) - xr.merge([xr.DataArray(n, name='var%d' % n) for n in range(5)]) + xr.merge([ds, ds.rename({"foo": "bar"})]) + xr.merge([xr.DataArray(n, name="var%d" % n) for n in range(5)]) If you merge another dataset (or a dictionary including data array objects), by default the resulting dataset will be aligned on the **union** of all index @@ -93,7 +93,7 @@ coordinates: .. ipython:: python - other = xr.Dataset({'bar': ('x', [1, 2, 3, 4]), 'x': list('abcd')}) + other = xr.Dataset({"bar": ("x", [1, 2, 3, 4]), "x": list("abcd")}) xr.merge([ds, other]) This ensures that ``merge`` is non-destructive. ``xarray.MergeError`` is raised @@ -116,7 +116,7 @@ used in the :py:class:`~xarray.Dataset` constructor: .. ipython:: python - xr.Dataset({'a': arr[:-1], 'b': arr[1:]}) + xr.Dataset({"a": arr[:-1], "b": arr[1:]}) .. _combine: @@ -131,8 +131,8 @@ are filled with ``NaN``. For example: .. ipython:: python - ar0 = xr.DataArray([[0, 0], [0, 0]], [('x', ['a', 'b']), ('y', [-1, 0])]) - ar1 = xr.DataArray([[1, 1], [1, 1]], [('x', ['b', 'c']), ('y', [0, 1])]) + ar0 = xr.DataArray([[0, 0], [0, 0]], [("x", ["a", "b"]), ("y", [-1, 0])]) + ar1 = xr.DataArray([[1, 1], [1, 1]], [("x", ["b", "c"]), ("y", [0, 1])]) ar0.combine_first(ar1) ar1.combine_first(ar0) @@ -152,7 +152,7 @@ variables with new values: .. ipython:: python - ds.update({'space': ('space', [10.2, 9.4, 3.9])}) + ds.update({"space": ("space", [10.2, 9.4, 3.9])}) However, dimensions are still required to be consistent between different Dataset variables, so you cannot change the size of a dimension unless you @@ -170,7 +170,7 @@ syntax: .. ipython:: python - ds['baz'] = xr.DataArray([9, 9, 9, 9, 9], coords=[('x', list('abcde'))]) + ds["baz"] = xr.DataArray([9, 9, 9, 9, 9], coords=[("x", list("abcde"))]) ds.baz Equals and identical @@ -193,7 +193,7 @@ object: .. ipython:: python - arr.identical(arr.rename('bar')) + arr.identical(arr.rename("bar")) :py:attr:`~xarray.Dataset.broadcast_equals` does a more relaxed form of equality check that allows variables to have different dimensions, as long as values @@ -201,8 +201,8 @@ are constant along those new dimensions: .. ipython:: python - left = xr.Dataset(coords={'x': 0}) - right = xr.Dataset({'x': [0, 0, 0]}) + left = xr.Dataset(coords={"x": 0}) + right = xr.Dataset({"x": [0, 0, 0]}) left.broadcast_equals(right) Like pandas objects, two xarray objects are still equal or identical if they have @@ -231,9 +231,9 @@ coordinates as long as any non-missing values agree or are disjoint: .. ipython:: python - ds1 = xr.Dataset({'a': ('x', [10, 20, 30, np.nan])}, {'x': [1, 2, 3, 4]}) - ds2 = xr.Dataset({'a': ('x', [np.nan, 30, 40, 50])}, {'x': [2, 3, 4, 5]}) - xr.merge([ds1, ds2], compat='no_conflicts') + ds1 = xr.Dataset({"a": ("x", [10, 20, 30, np.nan])}, {"x": [1, 2, 3, 4]}) + ds2 = xr.Dataset({"a": ("x", [np.nan, 30, 40, 50])}, {"x": [2, 3, 4, 5]}) + xr.merge([ds1, ds2], compat="no_conflicts") Note that due to the underlying representation of missing values as floating point numbers (``NaN``), variable data type is not always preserved when merging @@ -273,10 +273,12 @@ datasets into a doubly-nested list, e.g: .. ipython:: python - arr = xr.DataArray(name='temperature', data=np.random.randint(5, size=(2, 2)), dims=['x', 'y']) + arr = xr.DataArray( + name="temperature", data=np.random.randint(5, size=(2, 2)), dims=["x", "y"] + ) arr ds_grid = [[arr, arr], [arr, arr]] - xr.combine_nested(ds_grid, concat_dim=['x', 'y']) + xr.combine_nested(ds_grid, concat_dim=["x", "y"]) :py:func:`~xarray.combine_nested` can also be used to explicitly merge datasets with different variables. For example if we have 4 datasets, which are divided @@ -286,10 +288,10 @@ we wish to use ``merge`` instead of ``concat``: .. ipython:: python - temp = xr.DataArray(name='temperature', data=np.random.randn(2), dims=['t']) - precip = xr.DataArray(name='precipitation', data=np.random.randn(2), dims=['t']) + temp = xr.DataArray(name="temperature", data=np.random.randn(2), dims=["t"]) + precip = xr.DataArray(name="precipitation", data=np.random.randn(2), dims=["t"]) ds_grid = [[temp, precip], [temp, precip]] - xr.combine_nested(ds_grid, concat_dim=['t', None]) + xr.combine_nested(ds_grid, concat_dim=["t", None]) :py:func:`~xarray.combine_by_coords` is for combining objects which have dimension coordinates which specify their relationship to and order relative to one @@ -302,8 +304,8 @@ coordinates, not on their position in the list passed to ``combine_by_coords``. .. ipython:: python :okwarning: - x1 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [0, 1, 2])]) - x2 = xr.DataArray(name='foo', data=np.random.randn(3), coords=[('x', [3, 4, 5])]) + x1 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [0, 1, 2])]) + x2 = xr.DataArray(name="foo", data=np.random.randn(3), coords=[("x", [3, 4, 5])]) xr.combine_by_coords([x2, x1]) These functions can be used by :py:func:`~xarray.open_mfdataset` to open many diff --git a/doc/computation.rst b/doc/computation.rst index 4b8014c4782..3660aed93ed 100644 --- a/doc/computation.rst +++ b/doc/computation.rst @@ -18,17 +18,19 @@ Arithmetic operations with a single DataArray automatically vectorize (like numpy) over all array values: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python - arr = xr.DataArray(np.random.RandomState(0).randn(2, 3), - [('x', ['a', 'b']), ('y', [10, 20, 30])]) + arr = xr.DataArray( + np.random.RandomState(0).randn(2, 3), [("x", ["a", "b"]), ("y", [10, 20, 30])] + ) arr - 3 abs(arr) @@ -45,7 +47,7 @@ Use :py:func:`~xarray.where` to conditionally switch between values: .. ipython:: python - xr.where(arr > 0, 'positive', 'negative') + xr.where(arr > 0, "positive", "negative") Use `@` to perform matrix multiplication: @@ -73,14 +75,14 @@ methods for working with missing data from pandas: .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x']) + x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=["x"]) x.isnull() x.notnull() x.count() - x.dropna(dim='x') + x.dropna(dim="x") x.fillna(-1) - x.ffill('x') - x.bfill('x') + x.ffill("x") + x.bfill("x") Like pandas, xarray uses the float value ``np.nan`` (not-a-number) to represent missing values. @@ -90,9 +92,12 @@ for filling missing values via 1D interpolation. .. ipython:: python - x = xr.DataArray([0, 1, np.nan, np.nan, 2], dims=['x'], - coords={'xx': xr.Variable('x', [0, 1, 1.1, 1.9, 3])}) - x.interpolate_na(dim='x', method='linear', use_coordinate='xx') + x = xr.DataArray( + [0, 1, np.nan, np.nan, 2], + dims=["x"], + coords={"xx": xr.Variable("x", [0, 1, 1.1, 1.9, 3])}, + ) + x.interpolate_na(dim="x", method="linear", use_coordinate="xx") Note that xarray slightly diverges from the pandas ``interpolate`` syntax by providing the ``use_coordinate`` keyword which facilitates a clear specification @@ -110,8 +115,8 @@ applied along particular dimension(s): .. ipython:: python - arr.sum(dim='x') - arr.std(['x', 'y']) + arr.sum(dim="x") + arr.std(["x", "y"]) arr.min() @@ -121,7 +126,7 @@ for wrapping code designed to work with numpy arrays), you can use the .. ipython:: python - arr.get_axis_num('y') + arr.get_axis_num("y") These operations automatically skip missing values, like in pandas: @@ -142,8 +147,7 @@ method supports rolling window aggregation: .. ipython:: python - arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) arr :py:meth:`~xarray.DataArray.rolling` is applied along one dimension using the @@ -194,8 +198,9 @@ We can also manually iterate through ``Rolling`` objects: .. code:: python - for label, arr_window in r: - # arr_window is a view of x + for label, arr_window in r: + # arr_window is a view of x + ... .. _comput.rolling_exp: @@ -222,9 +227,9 @@ windowed rolling, convolution, short-time FFT etc. .. ipython:: python # rolling with 2-point stride - rolling_da = r.construct('window_dim', stride=2) + rolling_da = r.construct("window_dim", stride=2) rolling_da - rolling_da.mean('window_dim', skipna=False) + rolling_da.mean("window_dim", skipna=False) Because the ``DataArray`` given by ``r.construct('window_dim')`` is a view of the original array, it is memory efficient. @@ -232,8 +237,8 @@ You can also use ``construct`` to compute a weighted rolling sum: .. ipython:: python - weight = xr.DataArray([0.25, 0.5, 0.25], dims=['window']) - arr.rolling(y=3).construct('window').dot(weight) + weight = xr.DataArray([0.25, 0.5, 0.25], dims=["window"]) + arr.rolling(y=3).construct("window").dot(weight) .. note:: numpy's Nan-aggregation functions such as ``nansum`` copy the original array. @@ -254,52 +259,52 @@ support weighted ``sum`` and weighted ``mean``. .. ipython:: python - coords = dict(month=('month', [1, 2, 3])) + coords = dict(month=("month", [1, 2, 3])) - prec = xr.DataArray([1.1, 1.0, 0.9], dims=('month', ), coords=coords) - weights = xr.DataArray([31, 28, 31], dims=('month', ), coords=coords) + prec = xr.DataArray([1.1, 1.0, 0.9], dims=("month",), coords=coords) + weights = xr.DataArray([31, 28, 31], dims=("month",), coords=coords) Create a weighted object: .. ipython:: python - weighted_prec = prec.weighted(weights) - weighted_prec + weighted_prec = prec.weighted(weights) + weighted_prec Calculate the weighted sum: .. ipython:: python - weighted_prec.sum() + weighted_prec.sum() Calculate the weighted mean: .. ipython:: python - weighted_prec.mean(dim="month") + weighted_prec.mean(dim="month") The weighted sum corresponds to: .. ipython:: python - weighted_sum = (prec * weights).sum() - weighted_sum + weighted_sum = (prec * weights).sum() + weighted_sum and the weighted mean to: .. ipython:: python - weighted_mean = weighted_sum / weights.sum() - weighted_mean + weighted_mean = weighted_sum / weights.sum() + weighted_mean However, the functions also take missing values in the data into account: .. ipython:: python - data = xr.DataArray([np.NaN, 2, 4]) - weights = xr.DataArray([8, 1, 1]) + data = xr.DataArray([np.NaN, 2, 4]) + weights = xr.DataArray([8, 1, 1]) - data.weighted(weights).mean() + data.weighted(weights).mean() Using ``(data * weights).sum() / weights.sum()`` would (incorrectly) result in 0.6. @@ -309,16 +314,16 @@ If the weights add up to to 0, ``sum`` returns 0: .. ipython:: python - data = xr.DataArray([1.0, 1.0]) - weights = xr.DataArray([-1.0, 1.0]) + data = xr.DataArray([1.0, 1.0]) + weights = xr.DataArray([-1.0, 1.0]) - data.weighted(weights).sum() + data.weighted(weights).sum() and ``mean`` returns ``NaN``: .. ipython:: python - data.weighted(weights).mean() + data.weighted(weights).mean() .. note:: @@ -336,18 +341,21 @@ methods. This supports the block aggregation along multiple dimensions, .. ipython:: python - x = np.linspace(0, 10, 300) - t = pd.date_range('15/12/1999', periods=364) - da = xr.DataArray(np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), - dims=['time', 'x'], coords={'time': t, 'x': x}) - da + x = np.linspace(0, 10, 300) + t = pd.date_range("15/12/1999", periods=364) + da = xr.DataArray( + np.sin(x) * np.cos(np.linspace(0, 1, 364)[:, np.newaxis]), + dims=["time", "x"], + coords={"time": t, "x": x}, + ) + da In order to take a block mean for every 7 days along ``time`` dimension and every 2 points along ``x`` dimension, .. ipython:: python - da.coarsen(time=7, x=2).mean() + da.coarsen(time=7, x=2).mean() :py:meth:`~xarray.DataArray.coarsen` raises an ``ValueError`` if the data length is not a multiple of the corresponding window size. @@ -356,14 +364,14 @@ the excess entries or padding ``nan`` to insufficient entries, .. ipython:: python - da.coarsen(time=30, x=2, boundary='trim').mean() + da.coarsen(time=30, x=2, boundary="trim").mean() If you want to apply a specific function to coordinate, you can pass the function or method name to ``coord_func`` option, .. ipython:: python - da.coarsen(time=7, x=2, coord_func={'time': 'min'}).mean() + da.coarsen(time=7, x=2, coord_func={"time": "min"}).mean() .. _compute.using_coordinates: @@ -377,24 +385,25 @@ central finite differences using their coordinates, .. ipython:: python - a = xr.DataArray([0, 1, 2, 3], dims=['x'], coords=[[0.1, 0.11, 0.2, 0.3]]) + a = xr.DataArray([0, 1, 2, 3], dims=["x"], coords=[[0.1, 0.11, 0.2, 0.3]]) a - a.differentiate('x') + a.differentiate("x") This method can be used also for multidimensional arrays, .. ipython:: python - a = xr.DataArray(np.arange(8).reshape(4, 2), dims=['x', 'y'], - coords={'x': [0.1, 0.11, 0.2, 0.3]}) - a.differentiate('x') + a = xr.DataArray( + np.arange(8).reshape(4, 2), dims=["x", "y"], coords={"x": [0.1, 0.11, 0.2, 0.3]} + ) + a.differentiate("x") :py:meth:`~xarray.DataArray.integrate` computes integration based on trapezoidal rule using their coordinates, .. ipython:: python - a.integrate('x') + a.integrate("x") .. note:: These methods are limited to simple cartesian geometry. Differentiation @@ -412,9 +421,9 @@ best fitting coefficients along a given dimension and for a given order, .. ipython:: python - x = xr.DataArray(np.arange(10), dims=['x'], name='x') - a = xr.DataArray(3 + 4 * x, dims=['x'], coords={'x': x}) - out = a.polyfit(dim='x', deg=1, full=True) + x = xr.DataArray(np.arange(10), dims=["x"], name="x") + a = xr.DataArray(3 + 4 * x, dims=["x"], coords={"x": x}) + out = a.polyfit(dim="x", deg=1, full=True) out The method outputs a dataset containing the coefficients (and more if `full=True`). @@ -443,9 +452,9 @@ arrays with different sizes aligned along different dimensions: .. ipython:: python - a = xr.DataArray([1, 2], [('x', ['a', 'b'])]) + a = xr.DataArray([1, 2], [("x", ["a", "b"])]) a - b = xr.DataArray([-1, -2, -3], [('y', [10, 20, 30])]) + b = xr.DataArray([-1, -2, -3], [("y", [10, 20, 30])]) b With xarray, we can apply binary mathematical operations to these arrays, and @@ -460,7 +469,7 @@ appeared: .. ipython:: python - c = xr.DataArray(np.arange(6).reshape(3, 2), [b['y'], a['x']]) + c = xr.DataArray(np.arange(6).reshape(3, 2), [b["y"], a["x"]]) c a + c @@ -494,7 +503,7 @@ operations. The default result of a binary operation is by the *intersection* .. ipython:: python - arr = xr.DataArray(np.arange(3), [('x', range(3))]) + arr = xr.DataArray(np.arange(3), [("x", range(3))]) arr + arr[:-1] If coordinate values for a dimension are missing on either argument, all @@ -503,7 +512,7 @@ matching dimensions must have the same size: .. ipython:: :verbatim: - In [1]: arr + xr.DataArray([1, 2], dims='x') + In [1]: arr + xr.DataArray([1, 2], dims="x") ValueError: arguments without labels along dimension 'x' cannot be aligned because they have different dimension size(s) {2} than the size of the aligned dimension labels: 3 @@ -562,16 +571,20 @@ variables: .. ipython:: python - ds = xr.Dataset({'x_and_y': (('x', 'y'), np.random.randn(3, 5)), - 'x_only': ('x', np.random.randn(3))}, - coords=arr.coords) + ds = xr.Dataset( + { + "x_and_y": (("x", "y"), np.random.randn(3, 5)), + "x_only": ("x", np.random.randn(3)), + }, + coords=arr.coords, + ) ds > 0 Datasets support most of the same methods found on data arrays: .. ipython:: python - ds.mean(dim='x') + ds.mean(dim="x") abs(ds) Datasets also support NumPy ufuncs (requires NumPy v1.13 or newer), or @@ -594,7 +607,7 @@ Arithmetic between two datasets matches data variables of the same name: .. ipython:: python - ds2 = xr.Dataset({'x_and_y': 0, 'x_only': 100}) + ds2 = xr.Dataset({"x_and_y": 0, "x_only": 100}) ds - ds2 Similarly to index based alignment, the result has the intersection of all @@ -638,7 +651,7 @@ any additional arguments: .. ipython:: python squared_error = lambda x, y: (x - y) ** 2 - arr1 = xr.DataArray([0, 1, 2, 3], dims='x') + arr1 = xr.DataArray([0, 1, 2, 3], dims="x") xr.apply_ufunc(squared_error, arr1, 1) For using more complex operations that consider some array values collectively, @@ -658,21 +671,21 @@ to set ``axis=-1``. As an example, here is how we would wrap .. code-block:: python def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - :suppress: + :suppress: def vector_norm(x, dim, ord=None): - return xr.apply_ufunc(np.linalg.norm, x, - input_core_dims=[[dim]], - kwargs={'ord': ord, 'axis': -1}) + return xr.apply_ufunc( + np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1} + ) .. ipython:: python - vector_norm(arr1, dim='x') + vector_norm(arr1, dim="x") Because ``apply_ufunc`` follows a standard convention for ufuncs, it plays nicely with tools for building vectorized functions, like diff --git a/doc/contributing.rst b/doc/contributing.rst index f581bcd9741..51dba2bb0cc 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -261,13 +261,13 @@ Some other important things to know about the docs: .. ipython:: python x = 2 - x**3 + x ** 3 will be rendered as:: In [1]: x = 2 - In [2]: x**3 + In [2]: x ** 3 Out[2]: 8 Almost all code examples in the docs are run (and the output saved) during the @@ -467,7 +467,7 @@ typically find tests wrapped in a class. .. code-block:: python class TestReallyCoolFeature: - .... + ... Going forward, we are moving to a more *functional* style using the `pytest `__ framework, which offers a richer @@ -477,7 +477,7 @@ writing test classes, we will write test functions like this: .. code-block:: python def test_really_cool_feature(): - .... + ... Using ``pytest`` ~~~~~~~~~~~~~~~~ @@ -508,17 +508,23 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place from xarray.testing import assert_equal - @pytest.mark.parametrize('dtype', ['int8', 'int16', 'int32', 'int64']) + @pytest.mark.parametrize("dtype", ["int8", "int16", "int32", "int64"]) def test_dtypes(dtype): assert str(np.dtype(dtype)) == dtype - @pytest.mark.parametrize('dtype', ['float32', - pytest.param('int16', marks=pytest.mark.skip), - pytest.param('int32', marks=pytest.mark.xfail( - reason='to show how it works'))]) + @pytest.mark.parametrize( + "dtype", + [ + "float32", + pytest.param("int16", marks=pytest.mark.skip), + pytest.param( + "int32", marks=pytest.mark.xfail(reason="to show how it works") + ), + ], + ) def test_mark(dtype): - assert str(np.dtype(dtype)) == 'float32' + assert str(np.dtype(dtype)) == "float32" @pytest.fixture @@ -526,7 +532,7 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place return xr.DataArray([1, 2, 3]) - @pytest.fixture(params=['int8', 'int16', 'int32', 'int64']) + @pytest.fixture(params=["int8", "int16", "int32", "int64"]) def dtype(request): return request.param diff --git a/doc/dask.rst b/doc/dask.rst index 07b3939af6e..2248de9c0d8 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -56,19 +56,26 @@ argument to :py:func:`~xarray.open_dataset` or using the import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) np.set_printoptions(precision=3, linewidth=100, threshold=100, edgeitems=3) - ds = xr.Dataset({'temperature': (('time', 'latitude', 'longitude'), - np.random.randn(30, 180, 180)), - 'time': pd.date_range('2015-01-01', periods=30), - 'longitude': np.arange(180), - 'latitude': np.arange(89.5, -90.5, -1)}) - ds.to_netcdf('example-data.nc') + ds = xr.Dataset( + { + "temperature": ( + ("time", "latitude", "longitude"), + np.random.randn(30, 180, 180), + ), + "time": pd.date_range("2015-01-01", periods=30), + "longitude": np.arange(180), + "latitude": np.arange(89.5, -90.5, -1), + } + ) + ds.to_netcdf("example-data.nc") .. ipython:: python - ds = xr.open_dataset('example-data.nc', chunks={'time': 10}) + ds = xr.open_dataset("example-data.nc", chunks={"time": 10}) ds In this example ``latitude`` and ``longitude`` do not appear in the ``chunks`` @@ -106,7 +113,7 @@ usual way. .. ipython:: python - ds.to_netcdf('manipulated-example-data.nc') + ds.to_netcdf("manipulated-example-data.nc") By setting the ``compute`` argument to ``False``, :py:meth:`~xarray.Dataset.to_netcdf` will return a ``dask.delayed`` object that can be computed later. @@ -114,8 +121,9 @@ will return a ``dask.delayed`` object that can be computed later. .. ipython:: python from dask.diagnostics import ProgressBar + # or distributed.progress when using the distributed scheduler - delayed_obj = ds.to_netcdf('manipulated-example-data.nc', compute=False) + delayed_obj = ds.to_netcdf("manipulated-example-data.nc", compute=False) with ProgressBar(): results = delayed_obj.compute() @@ -141,8 +149,9 @@ Dask DataFrames do not support multi-indexes so the coordinate variables from th :suppress: import os - os.remove('example-data.nc') - os.remove('manipulated-example-data.nc') + + os.remove("example-data.nc") + os.remove("manipulated-example-data.nc") Using Dask with xarray ---------------------- @@ -199,7 +208,7 @@ Dask arrays using the :py:meth:`~xarray.Dataset.persist` method: .. ipython:: python - ds = ds.persist() + ds = ds.persist() :py:meth:`~xarray.Dataset.persist` is particularly useful when using a distributed cluster because the data will be loaded into distributed memory @@ -224,11 +233,11 @@ sizes of Dask arrays is done with the :py:meth:`~xarray.Dataset.chunk` method: .. ipython:: python :suppress: - ds = ds.chunk({'time': 10}) + ds = ds.chunk({"time": 10}) .. ipython:: python - rechunked = ds.chunk({'latitude': 100, 'longitude': 100}) + rechunked = ds.chunk({"latitude": 100, "longitude": 100}) You can view the size of existing chunks on an array by viewing the :py:attr:`~xarray.Dataset.chunks` attribute: @@ -256,6 +265,7 @@ lazy Dask arrays, in the :ref:`xarray.ufuncs ` module: .. ipython:: python import xarray.ufuncs as xu + xu.sin(rechunked) To access Dask arrays directly, use the new @@ -302,24 +312,32 @@ we use to calculate `Spearman's rank-correlation coefficient ` and @@ -453,15 +470,15 @@ dataset variables: .. ipython:: python - ds.rename({'temperature': 'temp', 'precipitation': 'precip'}) + ds.rename({"temperature": "temp", "precipitation": "precip"}) The related :py:meth:`~xarray.Dataset.swap_dims` method allows you do to swap dimension and non-dimension variables: .. ipython:: python - ds.coords['day'] = ('time', [6, 7, 8]) - ds.swap_dims({'time': 'day'}) + ds.coords["day"] = ("time", [6, 7, 8]) + ds.swap_dims({"time": "day"}) .. _coordinates: @@ -519,8 +536,8 @@ To convert back and forth between data and coordinates, you can use the .. ipython:: python ds.reset_coords() - ds.set_coords(['temperature', 'precipitation']) - ds['temperature'].reset_coords(drop=True) + ds.set_coords(["temperature", "precipitation"]) + ds["temperature"].reset_coords(drop=True) Notice that these operations skip coordinates with names given by dimensions, as used for indexing. This mostly because we are not entirely sure how to @@ -544,7 +561,7 @@ logic used for merging coordinates in arithmetic operations .. ipython:: python - alt = xr.Dataset(coords={'z': [10], 'lat': 0, 'lon': 0}) + alt = xr.Dataset(coords={"z": [10], "lat": 0, "lon": 0}) ds.coords.merge(alt.coords) The ``coords.merge`` method may be useful if you want to implement your own @@ -560,7 +577,7 @@ To convert a coordinate (or any ``DataArray``) into an actual .. ipython:: python - ds['time'].to_index() + ds["time"].to_index() A useful shortcut is the ``indexes`` property (on both ``DataArray`` and ``Dataset``), which lazily constructs a dictionary whose keys are given by each @@ -577,9 +594,10 @@ Xarray supports labeling coordinate values with a :py:class:`pandas.MultiIndex`: .. ipython:: python - midx = pd.MultiIndex.from_arrays([['R', 'R', 'V', 'V'], [.1, .2, .7, .9]], - names=('band', 'wn')) - mda = xr.DataArray(np.random.rand(4), coords={'spec': midx}, dims='spec') + midx = pd.MultiIndex.from_arrays( + [["R", "R", "V", "V"], [0.1, 0.2, 0.7, 0.9]], names=("band", "wn") + ) + mda = xr.DataArray(np.random.rand(4), coords={"spec": midx}, dims="spec") mda For convenience multi-index levels are directly accessible as "virtual" or @@ -587,8 +605,8 @@ For convenience multi-index levels are directly accessible as "virtual" or .. ipython:: python - mda['band'] - mda.wn + mda["band"] + mda.wn Indexing with multi-index levels is also possible using the ``sel`` method (see :ref:`multi-level indexing`). diff --git a/doc/faq.rst b/doc/faq.rst index 576cec5c2b1..a2b8be47e06 100644 --- a/doc/faq.rst +++ b/doc/faq.rst @@ -4,11 +4,12 @@ Frequently Asked Questions ========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) @@ -103,21 +104,21 @@ code fragment .. ipython:: python arr = xr.DataArray([1, 2, 3]) - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}) does not yield the pandas DataFrame we expected. We need to specify the type conversion ourselves: .. ipython:: python - pd.Series({'x': arr[0], 'mean': arr.mean(), 'std': arr.std()}, dtype=float) + pd.Series({"x": arr[0], "mean": arr.mean(), "std": arr.std()}, dtype=float) Alternatively, we could use the ``item`` method or the ``float`` constructor to convert values one at a time .. ipython:: python - pd.Series({'x': arr[0].item(), 'mean': float(arr.mean())}) + pd.Series({"x": arr[0].item(), "mean": float(arr.mean())}) .. _approach to metadata: diff --git a/doc/groupby.rst b/doc/groupby.rst index 223185bd0d5..c72a26c45ea 100644 --- a/doc/groupby.rst +++ b/doc/groupby.rst @@ -26,11 +26,12 @@ Split Let's create a simple example dataset: .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. ipython:: python @@ -47,20 +48,20 @@ use a DataArray directly), we get back a ``GroupBy`` object: .. ipython:: python - ds.groupby('letters') + ds.groupby("letters") This object works very similarly to a pandas GroupBy object. You can view the group indices with the ``groups`` attribute: .. ipython:: python - ds.groupby('letters').groups + ds.groupby("letters").groups You can also iterate over groups in ``(label, group)`` pairs: .. ipython:: python - list(ds.groupby('letters')) + list(ds.groupby("letters")) Just like in pandas, creating a GroupBy object is cheap: it does not actually split the data until you access particular values. @@ -75,8 +76,8 @@ a customized coordinate, but xarray facilitates this via the .. ipython:: python - x_bins = [0,25,50] - ds.groupby_bins('x', x_bins).groups + x_bins = [0, 25, 50] + ds.groupby_bins("x", x_bins).groups The binning is implemented via :func:`pandas.cut`, whose documentation details how the bins are assigned. As seen in the example above, by default, the bins are @@ -86,8 +87,8 @@ choose `float` labels which identify the bin centers: .. ipython:: python - x_bin_labels = [12.5,37.5] - ds.groupby_bins('x', x_bins, labels=x_bin_labels).groups + x_bin_labels = [12.5, 37.5] + ds.groupby_bins("x", x_bins, labels=x_bin_labels).groups Apply @@ -102,7 +103,8 @@ concatenated back together along the group axis: def standardize(x): return (x - x.mean()) / x.std() - arr.groupby('letters').map(standardize) + + arr.groupby("letters").map(standardize) GroupBy objects also have a :py:meth:`~xarray.core.groupby.DatasetGroupBy.reduce` method and methods like :py:meth:`~xarray.core.groupby.DatasetGroupBy.mean` as shortcuts for applying an @@ -110,14 +112,14 @@ aggregation function: .. ipython:: python - arr.groupby('letters').mean(dim='x') + arr.groupby("letters").mean(dim="x") Using a groupby is thus also a convenient shortcut for aggregating over all dimensions *other than* the provided one: .. ipython:: python - ds.groupby('x').std(...) + ds.groupby("x").std(...) .. note:: @@ -134,7 +136,7 @@ values for group along the grouped dimension: .. ipython:: python - ds.groupby('letters').first(...) + ds.groupby("letters").first(...) By default, they skip missing values (control this with ``skipna``). @@ -149,9 +151,9 @@ coordinates. For example: .. ipython:: python - alt = arr.groupby('letters').mean(...) + alt = arr.groupby("letters").mean(...) alt - ds.groupby('letters') - alt + ds.groupby("letters") - alt This last line is roughly equivalent to the following:: @@ -169,11 +171,11 @@ the ``squeeze`` parameter: .. ipython:: python - next(iter(arr.groupby('x'))) + next(iter(arr.groupby("x"))) .. ipython:: python - next(iter(arr.groupby('x', squeeze=False))) + next(iter(arr.groupby("x", squeeze=False))) Although xarray will attempt to automatically :py:attr:`~xarray.DataArray.transpose` dimensions back into their original order @@ -197,13 +199,17 @@ __ http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_two_dimen .. ipython:: python - da = xr.DataArray([[0,1],[2,3]], - coords={'lon': (['ny','nx'], [[30,40],[40,50]] ), - 'lat': (['ny','nx'], [[10,10],[20,20]] ),}, - dims=['ny','nx']) + da = xr.DataArray( + [[0, 1], [2, 3]], + coords={ + "lon": (["ny", "nx"], [[30, 40], [40, 50]]), + "lat": (["ny", "nx"], [[10, 10], [20, 20]]), + }, + dims=["ny", "nx"], + ) da - da.groupby('lon').sum(...) - da.groupby('lon').map(lambda x: x - x.mean(), shortcut=False) + da.groupby("lon").sum(...) + da.groupby("lon").map(lambda x: x - x.mean(), shortcut=False) Because multidimensional groups have the ability to generate a very large number of bins, coarse-binning via :py:meth:`~xarray.Dataset.groupby_bins` @@ -211,7 +217,7 @@ may be desirable: .. ipython:: python - da.groupby_bins('lon', [0,45,50]).sum() + da.groupby_bins("lon", [0, 45, 50]).sum() These methods group by `lon` values. It is also possible to groupby each cell in a grid, regardless of value, by stacking multiple dimensions, @@ -219,5 +225,5 @@ applying your function, and then unstacking the result: .. ipython:: python - stacked = da.stack(gridcell=['ny', 'nx']) - stacked.groupby('gridcell').sum(...).unstack('gridcell') + stacked = da.stack(gridcell=["ny", "nx"]) + stacked.groupby("gridcell").sum(...).unstack("gridcell") \ No newline at end of file diff --git a/doc/indexing.rst b/doc/indexing.rst index cfbb84a8343..af8e44fb80b 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -4,11 +4,12 @@ Indexing and selecting data =========================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers extremely flexible indexing routines that combine the best @@ -60,9 +61,13 @@ DataArray: .. ipython:: python - da = xr.DataArray(np.random.rand(4, 3), - [('time', pd.date_range('2000-01-01', periods=4)), - ('space', ['IA', 'IL', 'IN'])]) + da = xr.DataArray( + np.random.rand(4, 3), + [ + ("time", pd.date_range("2000-01-01", periods=4)), + ("space", ["IA", "IL", "IN"]), + ], + ) da[:2] da[0, 0] da[:, [2, 1]] @@ -81,7 +86,7 @@ fast. To do label based indexing, use the :py:attr:`~xarray.DataArray.loc` attri .. ipython:: python - da.loc['2000-01-01':'2000-01-02', 'IA'] + da.loc["2000-01-01":"2000-01-02", "IA"] In this example, the selected is a subpart of the array in the range '2000-01-01':'2000-01-02' along the first coordinate `time` @@ -98,7 +103,7 @@ Setting values with label based indexing is also supported: .. ipython:: python - da.loc['2000-01-01', ['IL', 'IN']] = -10 + da.loc["2000-01-01", ["IL", "IN"]] = -10 da @@ -117,7 +122,7 @@ use them explicitly to slice data. There are two ways to do this: da[dict(space=0, time=slice(None, 2))] # index by dimension coordinate labels - da.loc[dict(time=slice('2000-01-01', '2000-01-02'))] + da.loc[dict(time=slice("2000-01-01", "2000-01-02"))] 2. Use the :py:meth:`~xarray.DataArray.sel` and :py:meth:`~xarray.DataArray.isel` convenience methods: @@ -128,7 +133,7 @@ use them explicitly to slice data. There are two ways to do this: da.isel(space=0, time=slice(None, 2)) # index by dimension coordinate labels - da.sel(time=slice('2000-01-01', '2000-01-02')) + da.sel(time=slice("2000-01-01", "2000-01-02")) The arguments to these methods can be any objects that could index the array along the dimension given by the keyword, e.g., labels for an individual value, @@ -156,16 +161,16 @@ enabling nearest neighbor (inexact) lookups by use of the methods ``'pad'``, .. ipython:: python - da = xr.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - da.sel(x=[1.1, 1.9], method='nearest') - da.sel(x=0.1, method='backfill') - da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + da = xr.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + da.sel(x=[1.1, 1.9], method="nearest") + da.sel(x=0.1, method="backfill") + da.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") Tolerance limits the maximum distance for valid matches with an inexact lookup: .. ipython:: python - da.reindex(x=[1.1, 1.5], method='nearest', tolerance=0.2) + da.reindex(x=[1.1, 1.5], method="nearest", tolerance=0.2) The method parameter is not yet supported if any of the arguments to ``.sel()`` is a ``slice`` object: @@ -173,7 +178,7 @@ to ``.sel()`` is a ``slice`` object: .. ipython:: :verbatim: - In [1]: da.sel(x=slice(1, 3), method='nearest') + In [1]: da.sel(x=slice(1, 3), method="nearest") NotImplementedError However, you don't need to use ``method`` to do inexact slicing. Slicing @@ -182,15 +187,15 @@ labels are monotonic increasing: .. ipython:: python - da.sel(x=slice(0.9, 3.1)) + da.sel(x=slice(0.9, 3.1)) Indexing axes with monotonic decreasing labels also works, as long as the ``slice`` or ``.loc`` arguments are also decreasing: .. ipython:: python - reversed_da = da[::-1] - reversed_da.loc[3.1:0.9] + reversed_da = da[::-1] + reversed_da.loc[3.1:0.9] .. note:: @@ -227,7 +232,7 @@ arrays). However, you can do normal indexing with dimension names: .. ipython:: python ds[dict(space=[0], time=[0])] - ds.loc[dict(time='2000-01-01')] + ds.loc[dict(time="2000-01-01")] Using indexing to *assign* values to a subset of dataset (e.g., ``ds[dict(space=0)] = 1``) is not yet supported. @@ -240,7 +245,7 @@ index labels along a dimension dropped: .. ipython:: python - ds.drop_sel(space=['IN', 'IL']) + ds.drop_sel(space=["IN", "IL"]) ``drop_sel`` is both a ``Dataset`` and ``DataArray`` method. @@ -249,7 +254,7 @@ Any variables with these dimensions are also dropped: .. ipython:: python - ds.drop_dims('time') + ds.drop_dims("time") .. _masking with where: @@ -263,7 +268,7 @@ xarray, use :py:meth:`~xarray.DataArray.where`: .. ipython:: python - da = xr.DataArray(np.arange(16).reshape(4, 4), dims=['x', 'y']) + da = xr.DataArray(np.arange(16).reshape(4, 4), dims=["x", "y"]) da.where(da.x + da.y < 4) This is particularly useful for ragged indexing of multi-dimensional data, @@ -296,7 +301,7 @@ multiple values, use :py:meth:`~xarray.DataArray.isin`: .. ipython:: python - da = xr.DataArray([1, 2, 3, 4, 5], dims=['x']) + da = xr.DataArray([1, 2, 3, 4, 5], dims=["x"]) da.isin([2, 4]) :py:meth:`~xarray.DataArray.isin` works particularly well with @@ -305,7 +310,7 @@ already labels of an array: .. ipython:: python - lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=['x']) + lookup = xr.DataArray([-1, -2, -3, -4, -5], dims=["x"]) da.where(lookup.isin([-2, -4]), drop=True) However, some caution is in order: when done repeatedly, this type of indexing @@ -328,7 +333,6 @@ MATLAB, or after using the :py:func:`numpy.ix_` helper: .. ipython:: python - da = xr.DataArray( np.arange(12).reshape((3, 4)), dims=["x", "y"], @@ -344,8 +348,8 @@ dimensions: .. ipython:: python - ind_x = xr.DataArray([0, 1], dims=['x']) - ind_y = xr.DataArray([0, 1], dims=['y']) + ind_x = xr.DataArray([0, 1], dims=["x"]) + ind_y = xr.DataArray([0, 1], dims=["y"]) da[ind_x, ind_y] # orthogonal indexing da[ind_x, ind_x] # vectorized indexing @@ -364,7 +368,7 @@ indexers' dimension: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da[ind] Similar to how NumPy's `advanced indexing`_ works, vectorized @@ -378,18 +382,18 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: .. ipython:: python - ind = xr.DataArray([[0, 1], [0, 1]], dims=['a', 'b']) + ind = xr.DataArray([[0, 1], [0, 1]], dims=["a", "b"]) da.isel(y=ind) # same as da[:, ind] - ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) + ind = xr.DataArray([["a", "b"], ["b", "a"]], dims=["a", "b"]) da.loc[:, ind] # same as da.sel(y=ind) These methods may also be applied to ``Dataset`` objects .. ipython:: python - ds = da.to_dataset(name='bar') - ds.isel(x=xr.DataArray([0, 1, 2], dims=['points'])) + ds = da.to_dataset(name="bar") + ds.isel(x=xr.DataArray([0, 1, 2], dims=["points"])) .. tip:: @@ -476,8 +480,8 @@ Like ``numpy.ndarray``, value assignment sometimes works differently from what o .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) - ind = xr.DataArray([0, 0, 0], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) + ind = xr.DataArray([0, 0, 0], dims=["x"]) da[ind] -= 1 da @@ -511,7 +515,7 @@ __ https://docs.scipy.org/doc/numpy/user/basics.indexing.html#assigning-values-t .. ipython:: python - da = xr.DataArray([0, 1, 2, 3], dims=['x']) + da = xr.DataArray([0, 1, 2, 3], dims=["x"]) # DO NOT do this da.isel(x=[0, 1, 2])[1] = -1 da @@ -581,15 +585,15 @@ To reindex a particular dimension, use :py:meth:`~xarray.DataArray.reindex`: .. ipython:: python - da.reindex(space=['IA', 'CA']) + da.reindex(space=["IA", "CA"]) The :py:meth:`~xarray.DataArray.reindex_like` method is a useful shortcut. To demonstrate, we will make a subset DataArray with new values: .. ipython:: python - foo = da.rename('foo') - baz = (10 * da[:2, :2]).rename('baz') + foo = da.rename("foo") + baz = (10 * da[:2, :2]).rename("baz") baz Reindexing ``foo`` with ``baz`` selects out the first two values along each @@ -611,8 +615,8 @@ The :py:func:`~xarray.align` function lets us perform more flexible database-lik .. ipython:: python - xr.align(foo, baz, join='inner') - xr.align(foo, baz, join='outer') + xr.align(foo, baz, join="inner") + xr.align(foo, baz, join="outer") Both ``reindex_like`` and ``align`` work interchangeably between :py:class:`~xarray.DataArray` and :py:class:`~xarray.Dataset` objects, and with any number of matching dimension names: @@ -621,7 +625,7 @@ Both ``reindex_like`` and ``align`` work interchangeably between ds ds.reindex_like(baz) - other = xr.DataArray(['a', 'b', 'c'], dims='other') + other = xr.DataArray(["a", "b", "c"], dims="other") # this is a no-op, because there are no shared dimension names ds.reindex_like(other) @@ -636,7 +640,7 @@ integer-based indexing as a fallback for dimensions without a coordinate label: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da.sel(x=[0, -1]) Alignment between xarray objects where one or both do not have coordinate labels @@ -675,9 +679,9 @@ labels: .. ipython:: python - da = xr.DataArray([1, 2, 3], dims='x') + da = xr.DataArray([1, 2, 3], dims="x") da - da.get_index('x') + da.get_index("x") .. _copies_vs_views: @@ -721,7 +725,6 @@ pandas: .. ipython:: python - midx = pd.MultiIndex.from_product([list("abc"), [0, 1]], names=("one", "two")) mda = xr.DataArray(np.random.rand(6, 3), [("x", midx), ("y", range(3))]) mda @@ -732,20 +735,20 @@ a slice of tuples: .. ipython:: python - mda.sel(x=[('a', 0), ('b', 1)]) + mda.sel(x=[("a", 0), ("b", 1)]) Additionally, xarray supports dictionaries: .. ipython:: python - mda.sel(x={'one': 'a', 'two': 0}) + mda.sel(x={"one": "a", "two": 0}) For convenience, ``sel`` also accepts multi-index levels directly as keyword arguments: .. ipython:: python - mda.sel(one='a', two=0) + mda.sel(one="a", two=0) Note that using ``sel`` it is not possible to mix a dimension indexer with level indexers for that dimension @@ -757,7 +760,7 @@ multi-index is reduced to a single index. .. ipython:: python - mda.loc[{'one': 'a'}, ...] + mda.loc[{"one": "a"}, ...] Unlike pandas, xarray does not guess whether you provide index levels or dimensions when using ``loc`` in some ambiguous cases. For example, for diff --git a/doc/internals.rst b/doc/internals.rst index a4870f2316a..c23aab8c5d7 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -46,11 +46,12 @@ Extending xarray ---------------- .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray is designed as a general purpose library, and hence tries to avoid @@ -87,11 +88,12 @@ defined that returns an instance of your class: .. code-block:: python - class Dataset: - ... - @property - def geo(self) - return GeoAccessor(self) + class Dataset: + ... + + @property + def geo(self): + return GeoAccessor(self) However, using the register accessor decorators is preferable to simply adding your own ad-hoc property (i.e., ``Dataset.geo = property(...)``), for several @@ -116,14 +118,13 @@ reasons: Back in an interactive IPython session, we can use these properties: .. ipython:: python - :suppress: + :suppress: - exec(open("examples/_code/accessor_example.py").read()) + exec(open("examples/_code/accessor_example.py").read()) .. ipython:: python - ds = xr.Dataset({'longitude': np.linspace(0, 10), - 'latitude': np.linspace(0, 20)}) + ds = xr.Dataset({"longitude": np.linspace(0, 10), "latitude": np.linspace(0, 20)}) ds.geo.center ds.geo.plot() diff --git a/doc/interpolation.rst b/doc/interpolation.rst index 4cf39807e5a..c2922813e15 100644 --- a/doc/interpolation.rst +++ b/doc/interpolation.rst @@ -4,11 +4,12 @@ Interpolating data ================== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) xarray offers flexible interpolation routines, which have a similar interface @@ -27,9 +28,10 @@ indexing of a :py:class:`~xarray.DataArray`, .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) # label lookup da.sel(time=3) @@ -52,16 +54,17 @@ To interpolate data with a :py:doc:`numpy.datetime64 .. ipython:: python - da_dt64 = xr.DataArray([1, 3], - [('time', pd.date_range('1/1/2000', '1/3/2000', periods=2))]) - da_dt64.interp(time='2000-01-02') + da_dt64 = xr.DataArray( + [1, 3], [("time", pd.date_range("1/1/2000", "1/3/2000", periods=2))] + ) + da_dt64.interp(time="2000-01-02") The interpolated data can be merged into the original :py:class:`~xarray.DataArray` by specifying the time periods required. .. ipython:: python - da_dt64.interp(time=pd.date_range('1/1/2000', '1/3/2000', periods=3)) + da_dt64.interp(time=pd.date_range("1/1/2000", "1/3/2000", periods=3)) Interpolation of data indexed by a :py:class:`~xarray.CFTimeIndex` is also allowed. See :ref:`CFTimeIndex` for examples. @@ -108,9 +111,10 @@ different coordinates, .. ipython:: python - other = xr.DataArray(np.sin(0.4 * np.arange(9).reshape(3, 3)), - [('time', [0.9, 1.9, 2.9]), - ('space', [0.15, 0.25, 0.35])]) + other = xr.DataArray( + np.sin(0.4 * np.arange(9).reshape(3, 3)), + [("time", [0.9, 1.9, 2.9]), ("space", [0.15, 0.25, 0.35])], + ) it might be a good idea to first interpolate ``da`` so that it will stay on the same coordinates of ``other``, and then subtract it. @@ -118,9 +122,9 @@ same coordinates of ``other``, and then subtract it. .. ipython:: python - # interpolate da along other's coordinates - interpolated = da.interp_like(other) - interpolated + # interpolate da along other's coordinates + interpolated = da.interp_like(other) + interpolated It is now possible to safely compute the difference ``other - interpolated``. @@ -135,12 +139,15 @@ The interpolation method can be specified by the optional ``method`` argument. .. ipython:: python - da = xr.DataArray(np.sin(np.linspace(0, 2 * np.pi, 10)), dims='x', - coords={'x': np.linspace(0, 1, 10)}) + da = xr.DataArray( + np.sin(np.linspace(0, 2 * np.pi, 10)), + dims="x", + coords={"x": np.linspace(0, 1, 10)}, + ) - da.plot.line('o', label='original') - da.interp(x=np.linspace(0, 1, 100)).plot.line(label='linear (default)') - da.interp(x=np.linspace(0, 1, 100), method='cubic').plot.line(label='cubic') + da.plot.line("o", label="original") + da.interp(x=np.linspace(0, 1, 100)).plot.line(label="linear (default)") + da.interp(x=np.linspace(0, 1, 100), method="cubic").plot.line(label="cubic") @savefig interpolation_sample1.png width=4in plt.legend() @@ -149,15 +156,16 @@ Additional keyword arguments can be passed to scipy's functions. .. ipython:: python # fill 0 for the outside of the original coordinates. - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 0.0}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": 0.0}) # 1-dimensional extrapolation - da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={'fill_value': 'extrapolate'}) + da.interp(x=np.linspace(-0.5, 1.5, 10), kwargs={"fill_value": "extrapolate"}) # multi-dimensional extrapolation - da = xr.DataArray(np.sin(0.3 * np.arange(12).reshape(4, 3)), - [('time', np.arange(4)), - ('space', [0.1, 0.2, 0.3])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(12).reshape(4, 3)), + [("time", np.arange(4)), ("space", [0.1, 0.2, 0.3])], + ) - da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={'fill_value': None}) + da.interp(time=4, space=np.linspace(-0.1, 0.5, 10), kwargs={"fill_value": None}) Advanced Interpolation @@ -181,17 +189,18 @@ For example: .. ipython:: python - da = xr.DataArray(np.sin(0.3 * np.arange(20).reshape(5, 4)), - [('x', np.arange(5)), - ('y', [0.1, 0.2, 0.3, 0.4])]) + da = xr.DataArray( + np.sin(0.3 * np.arange(20).reshape(5, 4)), + [("x", np.arange(5)), ("y", [0.1, 0.2, 0.3, 0.4])], + ) # advanced indexing - x = xr.DataArray([0, 2, 4], dims='z') - y = xr.DataArray([0.1, 0.2, 0.3], dims='z') + x = xr.DataArray([0, 2, 4], dims="z") + y = xr.DataArray([0.1, 0.2, 0.3], dims="z") da.sel(x=x, y=y) # advanced interpolation - x = xr.DataArray([0.5, 1.5, 2.5], dims='z') - y = xr.DataArray([0.15, 0.25, 0.35], dims='z') + x = xr.DataArray([0.5, 1.5, 2.5], dims="z") + y = xr.DataArray([0.15, 0.25, 0.35], dims="z") da.interp(x=x, y=y) where values on the original coordinates @@ -203,9 +212,8 @@ If you want to add a coordinate to the new dimension ``z``, you can supply .. ipython:: python - x = xr.DataArray([0.5, 1.5, 2.5], dims='z', coords={'z': ['a', 'b','c']}) - y = xr.DataArray([0.15, 0.25, 0.35], dims='z', - coords={'z': ['a', 'b','c']}) + x = xr.DataArray([0.5, 1.5, 2.5], dims="z", coords={"z": ["a", "b", "c"]}) + y = xr.DataArray([0.15, 0.25, 0.35], dims="z", coords={"z": ["a", "b", "c"]}) da.interp(x=x, y=y) For the details of the advanced indexing, @@ -224,19 +232,18 @@ while other methods such as ``cubic`` or ``quadratic`` return all NaN arrays. .. ipython:: python - da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims='x', - coords={'x': range(5)}) + da = xr.DataArray([0, 2, np.nan, 3, 3.25], dims="x", coords={"x": range(5)}) da.interp(x=[0.5, 1.5, 2.5]) - da.interp(x=[0.5, 1.5, 2.5], method='cubic') + da.interp(x=[0.5, 1.5, 2.5], method="cubic") To avoid this, you can drop NaN by :py:meth:`~xarray.DataArray.dropna`, and then make the interpolation .. ipython:: python - dropped = da.dropna('x') + dropped = da.dropna("x") dropped - dropped.interp(x=[0.5, 1.5, 2.5], method='cubic') + dropped.interp(x=[0.5, 1.5, 2.5], method="cubic") If NaNs are distributed randomly in your multidimensional array, dropping all the columns containing more than one NaNs by @@ -246,7 +253,7 @@ which is similar to :py:meth:`pandas.Series.interpolate`. .. ipython:: python - filled = da.interpolate_na(dim='x') + filled = da.interpolate_na(dim="x") filled This fills NaN by interpolating along the specified dimension. @@ -254,7 +261,7 @@ After filling NaNs, you can interpolate: .. ipython:: python - filled.interp(x=[0.5, 1.5, 2.5], method='cubic') + filled.interp(x=[0.5, 1.5, 2.5], method="cubic") For the details of :py:meth:`~xarray.DataArray.interpolate_na`, see :ref:`Missing values `. @@ -268,18 +275,18 @@ Let's see how :py:meth:`~xarray.DataArray.interp` works on real data. .. ipython:: python # Raw data - ds = xr.tutorial.open_dataset('air_temperature').isel(time=0) + ds = xr.tutorial.open_dataset("air_temperature").isel(time=0) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) - axes[0].set_title('Raw data') + axes[0].set_title("Raw data") # Interpolated data - new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims['lon'] * 4) - new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims['lat'] * 4) + new_lon = np.linspace(ds.lon[0], ds.lon[-1], ds.dims["lon"] * 4) + new_lat = np.linspace(ds.lat[0], ds.lat[-1], ds.dims["lat"] * 4) dsi = ds.interp(lat=new_lat, lon=new_lon) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample3.png width=8in - axes[1].set_title('Interpolated data') + axes[1].set_title("Interpolated data") Our advanced interpolation can be used to remap the data to the new coordinate. Consider the new coordinates x and z on the two dimensional plane. @@ -291,20 +298,23 @@ The remapping can be done as follows x = np.linspace(240, 300, 100) z = np.linspace(20, 70, 100) # relation between new and original coordinates - lat = xr.DataArray(z, dims=['z'], coords={'z': z}) - lon = xr.DataArray((x[:, np.newaxis]-270)/np.cos(z*np.pi/180)+270, - dims=['x', 'z'], coords={'x': x, 'z': z}) + lat = xr.DataArray(z, dims=["z"], coords={"z": z}) + lon = xr.DataArray( + (x[:, np.newaxis] - 270) / np.cos(z * np.pi / 180) + 270, + dims=["x", "z"], + coords={"x": x, "z": z}, + ) fig, axes = plt.subplots(ncols=2, figsize=(10, 4)) ds.air.plot(ax=axes[0]) # draw the new coordinate on the original coordinates. for idx in [0, 33, 66, 99]: - axes[0].plot(lon.isel(x=idx), lat, '--k') + axes[0].plot(lon.isel(x=idx), lat, "--k") for idx in [0, 33, 66, 99]: - axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), '--k') - axes[0].set_title('Raw data') + axes[0].plot(*xr.broadcast(lon.isel(z=idx), lat.isel(z=idx)), "--k") + axes[0].set_title("Raw data") dsi = ds.interp(lon=lon, lat=lat) dsi.air.plot(ax=axes[1]) @savefig interpolation_sample4.png width=8in - axes[1].set_title('Remapped data') + axes[1].set_title("Remapped data") \ No newline at end of file diff --git a/doc/io.rst b/doc/io.rst index 0c666099df8..738d8d2b7ab 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -9,11 +9,12 @@ simple :ref:`io.pickle` files to the more flexible :ref:`io.netcdf` format (recommended). .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) .. _io.netcdf: @@ -52,12 +53,16 @@ We can save a Dataset to disk using the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) - ds.to_netcdf('saved_on_disk.nc') + ds.to_netcdf("saved_on_disk.nc") By default, the file is saved as netCDF4 (assuming netCDF4-Python is installed). You can control the format and engine used to write the file with @@ -76,7 +81,7 @@ We can load netCDF files to create a new Dataset using .. ipython:: python - ds_disk = xr.open_dataset('saved_on_disk.nc') + ds_disk = xr.open_dataset("saved_on_disk.nc") ds_disk Similarly, a DataArray can be saved to disk using the @@ -117,7 +122,7 @@ netCDF file. However, it's often cleaner to use a ``with`` statement: .. ipython:: python # this automatically closes the dataset after use - with xr.open_dataset('saved_on_disk.nc') as ds: + with xr.open_dataset("saved_on_disk.nc") as ds: print(ds.keys()) Although xarray provides reasonable support for incremental reads of files on @@ -171,7 +176,7 @@ You can view this encoding information (among others) in the .. ipython:: :verbatim: - In [1]: ds_disk['y'].encoding + In [1]: ds_disk["y"].encoding Out[1]: {'zlib': False, 'shuffle': False, @@ -469,7 +474,7 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :okwarning: # Writing complex valued data - da = xr.DataArray([1.+1.j, 2.+2.j, 3.+3.j]) + da = xr.DataArray([1.0 + 1.0j, 2.0 + 2.0j, 3.0 + 3.0j]) da.to_netcdf("complex.nc", engine="h5netcdf", invalid_netcdf=True) # Reading it back @@ -479,7 +484,8 @@ and currently raises a warning unless ``invalid_netcdf=True`` is set: :suppress: import os - os.remove('complex.nc') + + os.remove("complex.nc") .. warning:: @@ -499,9 +505,11 @@ installed xarray can convert a ``DataArray`` into a ``Cube`` using .. ipython:: python - da = xr.DataArray(np.random.rand(4, 5), dims=['x', 'y'], - coords=dict(x=[10, 20, 30, 40], - y=pd.date_range('2000-01-01', periods=5))) + da = xr.DataArray( + np.random.rand(4, 5), + dims=["x", "y"], + coords=dict(x=[10, 20, 30, 40], y=pd.date_range("2000-01-01", periods=5)), + ) cube = da.to_iris() cube @@ -548,8 +556,9 @@ __ http://iri.columbia.edu/ :verbatim: In [3]: remote_data = xr.open_dataset( - ...: 'http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods', - ...: decode_times=False) + ...: "http://iridl.ldeo.columbia.edu/SOURCES/.OSU/.PRISM/.monthly/dods", + ...: decode_times=False, + ...: ) In [4]: remote_data Out[4]: @@ -587,7 +596,7 @@ over the network until we look at particular values: .. ipython:: :verbatim: - In [4]: tmax = remote_data['tmax'][:500, ::3, ::3] + In [4]: tmax = remote_data["tmax"][:500, ::3, ::3] In [5]: tmax Out[5]: @@ -715,7 +724,8 @@ search indices or other automated data discovery tools. :suppress: import os - os.remove('saved_on_disk.nc') + + os.remove("saved_on_disk.nc") .. _io.rasterio: @@ -729,7 +739,7 @@ rasterio is installed. Here is an example of how to use .. ipython:: :verbatim: - In [7]: rio = xr.open_rasterio('RGB.byte.tif') + In [7]: rio = xr.open_rasterio("RGB.byte.tif") In [8]: rio Out[8]: @@ -769,7 +779,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [1]: import rioxarray - In [2]: rds = rioxarray.open_rasterio('RGB.byte.tif') + In [2]: rds = rioxarray.open_rasterio("RGB.byte.tif") In [3]: rds Out[3]: @@ -799,7 +809,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [6]: rds4326.rio.crs Out[6]: CRS.from_epsg(4326) - In [7]: rds4326.rio.to_raster('RGB.byte.4326.tif') + In [7]: rds4326.rio.to_raster("RGB.byte.4326.tif") .. _rasterio: https://rasterio.readthedocs.io/en/latest/ @@ -831,17 +841,21 @@ xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` meth To write to a local directory, we pass a path to a directory .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.rand(4, 5))}, - coords={'x': [10, 20, 30, 40], - 'y': pd.date_range('2000-01-01', periods=5), - 'z': ('x', list('abcd'))}) - ds.to_zarr('path/to/directory.zarr') + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.rand(4, 5))}, + coords={ + "x": [10, 20, 30, 40], + "y": pd.date_range("2000-01-01", periods=5), + "z": ("x", list("abcd")), + }, + ) + ds.to_zarr("path/to/directory.zarr") (The suffix ``.zarr`` is optional--just a reminder that a zarr store lives there.) If the directory does not exist, it will be created. If a zarr @@ -854,22 +868,30 @@ It is also possible to append to an existing store. For that, set can be omitted as it will internally be set to ``'a'``. .. ipython:: python - :suppress: + :suppress: ! rm -rf path/to/directory.zarr .. ipython:: python - ds1 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-01', periods=2)}) - ds1.to_zarr('path/to/directory.zarr') - ds2 = xr.Dataset({'foo': (('x', 'y', 't'), np.random.rand(4, 5, 2))}, - coords={'x': [10, 20, 30, 40], - 'y': [1,2,3,4,5], - 't': pd.date_range('2001-01-03', periods=2)}) - ds2.to_zarr('path/to/directory.zarr', append_dim='t') + ds1 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-01", periods=2), + }, + ) + ds1.to_zarr("path/to/directory.zarr") + ds2 = xr.Dataset( + {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, + coords={ + "x": [10, 20, 30, 40], + "y": [1, 2, 3, 4, 5], + "t": pd.date_range("2001-01-03", periods=2), + }, + ) + ds2.to_zarr("path/to/directory.zarr", append_dim="t") To store variable length strings use ``dtype=object``. @@ -878,7 +900,7 @@ To read back a zarr dataset that has been created this way, we use the .. ipython:: python - ds_zarr = xr.open_zarr('path/to/directory.zarr') + ds_zarr = xr.open_zarr("path/to/directory.zarr") ds_zarr Cloud Storage Buckets @@ -912,15 +934,16 @@ These options can be passed to the ``to_zarr`` method as variable encoding. For example: .. ipython:: python - :suppress: + :suppress: ! rm -rf foo.zarr .. ipython:: python import zarr - compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2) - ds.to_zarr('foo.zarr', encoding={'foo': {'compressor': compressor}}) + + compressor = zarr.Blosc(cname="zstd", clevel=3, shuffle=2) + ds.to_zarr("foo.zarr", encoding={"foo": {"compressor": compressor}}) .. note:: @@ -959,11 +982,12 @@ be done directly from zarr, as described in the .. _io.cfgrib: .. ipython:: python - :suppress: + :suppress: import shutil - shutil.rmtree('foo.zarr') - shutil.rmtree('path/to/directory.zarr') + + shutil.rmtree("foo.zarr") + shutil.rmtree("path/to/directory.zarr") GRIB format via cfgrib ---------------------- @@ -975,7 +999,7 @@ to :py:func:`open_dataset`: .. ipython:: :verbatim: - In [1]: ds_grib = xr.open_dataset('example.grib', engine='cfgrib') + In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") We recommend installing ecCodes via conda:: diff --git a/doc/pandas.rst b/doc/pandas.rst index b0ec2a117dc..acf1d16b6ee 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -20,6 +20,7 @@ __ http://seaborn.pydata.org/ import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Hierarchical and tidy data @@ -47,10 +48,15 @@ To convert any dataset to a ``DataFrame`` in tidy form, use the .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y'), np.random.randn(2, 3))}, - coords={'x': [10, 20], 'y': ['a', 'b', 'c'], - 'along_x': ('x', np.random.randn(2)), - 'scalar': 123}) + ds = xr.Dataset( + {"foo": (("x", "y"), np.random.randn(2, 3))}, + coords={ + "x": [10, 20], + "y": ["a", "b", "c"], + "along_x": ("x", np.random.randn(2)), + "scalar": 123, + }, + ) ds df = ds.to_dataframe() df @@ -91,7 +97,7 @@ DataFrames: .. ipython:: python - s = ds['foo'].to_series() + s = ds["foo"].to_series() s # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) @@ -117,8 +123,9 @@ available in pandas (i.e., a 1D array is converted to a .. ipython:: python - arr = xr.DataArray(np.random.randn(2, 3), - coords=[('x', [10, 20]), ('y', ['a', 'b', 'c'])]) + arr = xr.DataArray( + np.random.randn(2, 3), coords=[("x", [10, 20]), ("y", ["a", "b", "c"])] + ) df = arr.to_pandas() df @@ -136,9 +143,10 @@ preserve all use of multi-indexes: .. ipython:: python - index = pd.MultiIndex.from_arrays([['a', 'a', 'b'], [0, 1, 2]], - names=['one', 'two']) - df = pd.DataFrame({'x': 1, 'y': 2}, index=index) + index = pd.MultiIndex.from_arrays( + [["a", "a", "b"], [0, 1, 2]], names=["one", "two"] + ) + df = pd.DataFrame({"x": 1, "y": 2}, index=index) ds = xr.Dataset(df) ds @@ -175,9 +183,9 @@ Let's take a look: .. ipython:: python data = np.random.RandomState(0).rand(2, 3, 4) - items = list('ab') - major_axis = list('mno') - minor_axis = pd.date_range(start='2000', periods=4, name='date') + items = list("ab") + major_axis = list("mno") + minor_axis = pd.date_range(start="2000", periods=4, name="date") With old versions of pandas (prior to 0.25), this could stored in a ``Panel``: @@ -207,7 +215,7 @@ You can also easily convert this data into ``Dataset``: .. ipython:: python - array.to_dataset(dim='dim_0') + array.to_dataset(dim="dim_0") Here, there are two data variables, each representing a DataFrame on panel's ``items`` axis, and labeled as such. Each variable is a 2D array of the diff --git a/doc/plotting.rst b/doc/plotting.rst index f3d9c0213de..fb30417e2c6 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -56,6 +56,7 @@ Imports # Use defaults so we don't get gridlines in generated docs import matplotlib as mpl + mpl.rcdefaults() The following imports are necessary for all of the examples. @@ -71,7 +72,7 @@ For these examples we'll use the North American air temperature dataset. .. ipython:: python - airtemps = xr.tutorial.open_dataset('air_temperature') + airtemps = xr.tutorial.open_dataset("air_temperature") airtemps # Convert to celsius @@ -79,7 +80,7 @@ For these examples we'll use the North American air temperature dataset. # copy attributes to get nice figure labels and change Kelvin to Celsius air.attrs = airtemps.air.attrs - air.attrs['units'] = 'deg C' + air.attrs["units"] = "deg C" .. note:: Until :issue:`1614` is solved, you might need to copy over the metadata in ``attrs`` to get informative figure labels (as was done above). @@ -126,7 +127,7 @@ can be used: .. ipython:: python @savefig plotting_1d_additional_args.png width=4in - air1d[:200].plot.line('b-^') + air1d[:200].plot.line("b-^") .. note:: Not all xarray plotting methods support passing positional arguments @@ -138,7 +139,7 @@ Keyword arguments work the same way, and are more explicit. .. ipython:: python @savefig plotting_example_sin3.png width=4in - air1d[:200].plot.line(color='purple', marker='o') + air1d[:200].plot.line(color="purple", marker="o") ========================= Adding to Existing Axis @@ -219,7 +220,7 @@ plots to check the variation of air temperature at three different latitudes alo .. ipython:: python @savefig plotting_example_multiple_lines_x_kwarg.png - air.isel(lon=10, lat=[19,21,22]).plot.line(x='time') + air.isel(lon=10, lat=[19, 21, 22]).plot.line(x="time") It is required to explicitly specify either @@ -240,7 +241,7 @@ It is also possible to make line plots such that the data are on the x-axis and .. ipython:: python @savefig plotting_example_xy_kwarg.png - air.isel(time=10, lon=[10, 11]).plot(y='lat', hue='lon') + air.isel(time=10, lon=[10, 11]).plot(y="lat", hue="lon") ============ Step plots @@ -253,7 +254,7 @@ made using 1D data. :okwarning: @savefig plotting_example_step.png width=4in - air1d[:20].plot.step(where='mid') + air1d[:20].plot.step(where="mid") The argument ``where`` defines where the steps should be placed, options are ``'pre'`` (default), ``'post'``, and ``'mid'``. This is particularly handy @@ -261,15 +262,15 @@ when plotting data grouped with :py:meth:`Dataset.groupby_bins`. .. ipython:: python - air_grp = air.mean(['time','lon']).groupby_bins('lat',[0,23.5,66.5,90]) + air_grp = air.mean(["time", "lon"]).groupby_bins("lat", [0, 23.5, 66.5, 90]) air_mean = air_grp.mean() air_std = air_grp.std() air_mean.plot.step() - (air_mean + air_std).plot.step(ls=':') - (air_mean - air_std).plot.step(ls=':') - plt.ylim(-20,30) + (air_mean + air_std).plot.step(ls=":") + (air_mean - air_std).plot.step(ls=":") + plt.ylim(-20, 30) @savefig plotting_example_step_groupby.png width=4in - plt.title('Zonal mean temperature') + plt.title("Zonal mean temperature") In this case, the actual boundaries of the bins are used and the ``where`` argument is ignored. @@ -284,7 +285,9 @@ The keyword arguments ``xincrease`` and ``yincrease`` let you control the axes d .. ipython:: python @savefig plotting_example_xincrease_yincrease_kwarg.png - air.isel(time=10, lon=[10, 11]).plot.line(y='lat', hue='lon', xincrease=False, yincrease=False) + air.isel(time=10, lon=[10, 11]).plot.line( + y="lat", hue="lon", xincrease=False, yincrease=False + ) In addition, one can use ``xscale, yscale`` to set axes scaling; ``xticks, yticks`` to set axes ticks and ``xlim, ylim`` to set axes limits. These accept the same values as the matplotlib methods ``Axes.set_(x,y)scale()``, ``Axes.set_(x,y)ticks()``, ``Axes.set_(x,y)lim()`` respectively. @@ -348,7 +351,7 @@ produce plots with nonuniform coordinates. b = air2d.copy() # Apply a nonlinear transformation to one of the coords - b.coords['lat'] = np.log(b.coords['lat']) + b.coords["lat"] = np.log(b.coords["lat"]) @savefig plotting_nonuniform_coords.png width=4in b.plot() @@ -363,9 +366,9 @@ matplotlib is available. .. ipython:: python air2d.plot(cmap=plt.cm.Blues) - plt.title('These colors prove North America\nhas fallen in the ocean') - plt.ylabel('latitude') - plt.xlabel('longitude') + plt.title("These colors prove North America\nhas fallen in the ocean") + plt.ylabel("latitude") + plt.xlabel("longitude") plt.tight_layout() @savefig plotting_2d_call_matplotlib.png width=4in @@ -381,7 +384,7 @@ matplotlib is available. .. ipython:: python - plt.xlabel('Never gonna see this.') + plt.xlabel("Never gonna see this.") air2d.plot() @savefig plotting_2d_call_matplotlib2.png width=4in @@ -473,10 +476,10 @@ if using ``imshow`` or ``pcolormesh`` (but not with ``contour`` or ``contourf``, since levels are chosen automatically). .. ipython:: python - :okwarning: + :okwarning: @savefig plotting_seaborn_palette.png width=4in - air2d.plot(levels=10, cmap='husl') + air2d.plot(levels=10, cmap="husl") plt.draw() .. _plotting.faceting: @@ -520,14 +523,16 @@ arguments to the xarray plotting methods/functions. This returns a .. ipython:: python @savefig plot_facet_dataarray.png - g_simple = t.plot(x='lon', y='lat', col='time', col_wrap=3) + g_simple = t.plot(x="lon", y="lat", col="time", col_wrap=3) Faceting also works for line plots. .. ipython:: python @savefig plot_facet_dataarray_line.png - g_simple_line = t.isel(lat=slice(0,None,4)).plot(x='lon', hue='lat', col='time', col_wrap=3) + g_simple_line = t.isel(lat=slice(0, None, 4)).plot( + x="lon", hue="lat", col="time", col_wrap=3 + ) =============== 4 dimensional @@ -541,12 +546,12 @@ one were much hotter. .. ipython:: python t2 = t.isel(time=slice(0, 2)) - t4d = xr.concat([t2, t2 + 40], pd.Index(['normal', 'hot'], name='fourth_dim')) + t4d = xr.concat([t2, t2 + 40], pd.Index(["normal", "hot"], name="fourth_dim")) # This is a 4d array t4d.coords @savefig plot_facet_4d.png - t4d.plot(x='lon', y='lat', col='time', row='fourth_dim') + t4d.plot(x="lon", y="lat", col="time", row="fourth_dim") ================ Other features @@ -555,9 +560,9 @@ one were much hotter. Faceted plotting supports other arguments common to xarray 2d plots. .. ipython:: python - :suppress: + :suppress: - plt.close('all') + plt.close("all") .. ipython:: python @@ -566,9 +571,15 @@ Faceted plotting supports other arguments common to xarray 2d plots. hasoutliers[-1, -1, -1] = 400 @savefig plot_facet_robust.png - g = hasoutliers.plot.pcolormesh('lon', 'lat', col='time', col_wrap=3, - robust=True, cmap='viridis', - cbar_kwargs={'label': 'this has outliers'}) + g = hasoutliers.plot.pcolormesh( + "lon", + "lat", + col="time", + col_wrap=3, + robust=True, + cmap="viridis", + cbar_kwargs={"label": "this has outliers"}, + ) =================== FacetGrid Objects @@ -594,20 +605,20 @@ It's possible to select the :py:class:`xarray.DataArray` or .. ipython:: python - g.data.loc[g.name_dicts[0, 0]] + g.data.loc[g.name_dicts[0, 0]] Here is an example of using the lower level API and then modifying the axes after they have been plotted. .. ipython:: python - g = t.plot.imshow('lon', 'lat', col='time', col_wrap=3, robust=True) + g = t.plot.imshow("lon", "lat", col="time", col_wrap=3, robust=True) for i, ax in enumerate(g.axes.flat): - ax.set_title('Air Temperature %d' % i) + ax.set_title("Air Temperature %d" % i) bottomright = g.axes[-1, -1] - bottomright.annotate('bottom right', (240, 40)) + bottomright.annotate("bottom right", (240, 40)) @savefig plot_facet_iterator.png plt.draw() @@ -632,8 +643,8 @@ Consider this dataset .. ipython:: python - ds = xr.tutorial.scatter_example_dataset() - ds + ds = xr.tutorial.scatter_example_dataset() + ds Suppose we want to scatter ``A`` against ``B`` @@ -641,14 +652,14 @@ Suppose we want to scatter ``A`` against ``B`` .. ipython:: python @savefig ds_simple_scatter.png - ds.plot.scatter(x='A', y='B') + ds.plot.scatter(x="A", y="B") The ``hue`` kwarg lets you vary the color by variable value .. ipython:: python @savefig ds_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w') + ds.plot.scatter(x="A", y="B", hue="w") When ``hue`` is specified, a colorbar is added for numeric ``hue`` DataArrays by default and a legend is added for non-numeric ``hue`` DataArrays (as above). @@ -659,21 +670,21 @@ Additionally, the boolean kwarg ``add_guide`` can be used to prevent the display ds = ds.assign(w=[1, 2, 3, 5]) @savefig ds_discrete_legend_hue_scatter.png - ds.plot.scatter(x='A', y='B', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", hue="w", hue_style="discrete") The ``markersize`` kwarg lets you vary the point's size by variable value. You can additionally pass ``size_norm`` to control how the variable's values are mapped to point sizes. .. ipython:: python @savefig ds_hue_size_scatter.png - ds.plot.scatter(x='A', y='B', hue='z', hue_style='discrete', markersize='z') + ds.plot.scatter(x="A", y="B", hue="z", hue_style="discrete", markersize="z") Faceting is also possible .. ipython:: python @savefig ds_facet_scatter.png - ds.plot.scatter(x='A', y='B', col='x', row='z', hue='w', hue_style='discrete') + ds.plot.scatter(x="A", y="B", col="x", row="z", hue="w", hue_style="discrete") For more advanced scatter plots, we recommend converting the relevant data variables to a pandas DataFrame and using the extensive plotting capabilities of ``seaborn``. @@ -691,11 +702,15 @@ This script will plot the air temperature on a map. .. ipython:: python import cartopy.crs as ccrs - air = xr.tutorial.open_dataset('air_temperature').air + + air = xr.tutorial.open_dataset("air_temperature").air + ax = plt.axes(projection=ccrs.Orthographic(-80, 35)) - air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()); + air.isel(time=0).plot.contourf(ax=ax, transform=ccrs.PlateCarree()) + ax.set_global() + @savefig plotting_maps_cartopy.png width=100% - ax.set_global(); ax.coastlines(); + ax.coastlines() When faceting on maps, the projection can be transferred to the ``plot`` function using the ``subplot_kws`` keyword. The axes for the subplots created @@ -703,13 +718,16 @@ by faceting are accessible in the object returned by ``plot``: .. ipython:: python - p = air.isel(time=[0, 4]).plot(transform=ccrs.PlateCarree(), col='time', - subplot_kws={'projection': ccrs.Orthographic(-80, 35)}) + p = air.isel(time=[0, 4]).plot( + transform=ccrs.PlateCarree(), + col="time", + subplot_kws={"projection": ccrs.Orthographic(-80, 35)}, + ) for ax in p.axes.flat: ax.coastlines() ax.gridlines() @savefig plotting_maps_cartopy_facetting.png width=100% - plt.draw(); + plt.draw() Details @@ -732,6 +750,7 @@ These are provided for user convenience; they all call the same code. .. ipython:: python import xarray.plot as xplt + da = xr.DataArray(range(5)) fig, axes = plt.subplots(ncols=2, nrows=2) da.plot(ax=axes[0, 0]) @@ -766,8 +785,7 @@ read on. .. ipython:: python - a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=('y', 'x', 'z'), - name='temperature') + a0 = xr.DataArray(np.zeros((4, 3, 2)), dims=("y", "x", "z"), name="temperature") a0[0, 0, 0] = 1 a = a0.isel(z=0) a @@ -801,14 +819,16 @@ instead of the default ones: .. ipython:: python lon, lat = np.meshgrid(np.linspace(-20, 20, 5), np.linspace(0, 30, 4)) - lon += lat/10 - lat += lon/10 - da = xr.DataArray(np.arange(20).reshape(4, 5), dims=['y', 'x'], - coords = {'lat': (('y', 'x'), lat), - 'lon': (('y', 'x'), lon)}) + lon += lat / 10 + lat += lon / 10 + da = xr.DataArray( + np.arange(20).reshape(4, 5), + dims=["y", "x"], + coords={"lat": (("y", "x"), lat), "lon": (("y", "x"), lon)}, + ) @savefig plotting_example_2d_irreg.png width=4in - da.plot.pcolormesh('lon', 'lat'); + da.plot.pcolormesh("lon", "lat") Note that in this case, xarray still follows the pixel centered convention. This might be undesirable in some cases, for example when your data is defined @@ -818,22 +838,25 @@ this convention when plotting on a map: .. ipython:: python import cartopy.crs as ccrs - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) You can however decide to infer the cell boundaries and use the ``infer_intervals`` keyword: .. ipython:: python - ax = plt.subplot(projection=ccrs.PlateCarree()); - da.plot.pcolormesh('lon', 'lat', ax=ax, infer_intervals=True); - ax.scatter(lon, lat, transform=ccrs.PlateCarree()); + ax = plt.subplot(projection=ccrs.PlateCarree()) + da.plot.pcolormesh("lon", "lat", ax=ax, infer_intervals=True) + ax.scatter(lon, lat, transform=ccrs.PlateCarree()) + ax.coastlines() @savefig plotting_example_2d_irreg_map_infer.png width=4in - ax.coastlines(); ax.gridlines(draw_labels=True); + ax.gridlines(draw_labels=True) .. note:: The data model of xarray does not support datasets with `cell boundaries`_ @@ -847,6 +870,6 @@ One can also make line plots with multidimensional coordinates. In this case, `` .. ipython:: python f, ax = plt.subplots(2, 1) - da.plot.line(x='lon', hue='y', ax=ax[0]); + da.plot.line(x="lon", hue="y", ax=ax[0]) @savefig plotting_example_2d_hue_xy.png - da.plot.line(x='lon', hue='x', ax=ax[1]); + da.plot.line(x="lon", hue="x", ax=ax[1]) diff --git a/doc/quick-overview.rst b/doc/quick-overview.rst index 741b3d1a5fe..09b0d4c6fbb 100644 --- a/doc/quick-overview.rst +++ b/doc/quick-overview.rst @@ -22,16 +22,14 @@ array or list, with optional *dimensions* and *coordinates*: .. ipython:: python - data = xr.DataArray(np.random.randn(2, 3), - dims=('x', 'y'), - coords={'x': [10, 20]}) + data = xr.DataArray(np.random.randn(2, 3), dims=("x", "y"), coords={"x": [10, 20]}) data In this case, we have generated a 2D array, assigned the names *x* and *y* to the two dimensions respectively and associated two *coordinate labels* '10' and '20' with the two locations along the x dimension. If you supply a pandas :py:class:`~pandas.Series` or :py:class:`~pandas.DataFrame`, metadata is copied directly: .. ipython:: python - xr.DataArray(pd.Series(range(3), index=list('abc'), name='foo')) + xr.DataArray(pd.Series(range(3), index=list("abc"), name="foo")) Here are the key properties for a ``DataArray``: @@ -75,13 +73,13 @@ While you're setting up your DataArray, it's often a good idea to set metadata a .. ipython:: python - data.attrs['long_name'] = 'random velocity' - data.attrs['units'] = 'metres/sec' - data.attrs['description'] = 'A random variable created as an example.' - data.attrs['random_attribute'] = 123 + data.attrs["long_name"] = "random velocity" + data.attrs["units"] = "metres/sec" + data.attrs["description"] = "A random variable created as an example." + data.attrs["random_attribute"] = 123 data.attrs # you can add metadata to coordinates too - data.x.attrs['units'] = 'x units' + data.x.attrs["units"] = "x units" Computation @@ -102,15 +100,15 @@ numbers: .. ipython:: python - data.mean(dim='x') + data.mean(dim="x") Arithmetic operations broadcast based on dimension name. This means you don't need to insert dummy dimensions for alignment: .. ipython:: python - a = xr.DataArray(np.random.randn(3), [data.coords['y']]) - b = xr.DataArray(np.random.randn(4), dims='z') + a = xr.DataArray(np.random.randn(3), [data.coords["y"]]) + b = xr.DataArray(np.random.randn(4), dims="z") a b @@ -139,9 +137,9 @@ xarray supports grouped operations using a very similar API to pandas (see :ref: .. ipython:: python - labels = xr.DataArray(['E', 'F', 'E'], [data.coords['y']], name='labels') + labels = xr.DataArray(["E", "F", "E"], [data.coords["y"]], name="labels") labels - data.groupby(labels).mean('y') + data.groupby(labels).mean("y") data.groupby(labels).map(lambda x: x - x.min()) Plotting @@ -178,7 +176,7 @@ objects. You can think of it as a multi-dimensional generalization of the .. ipython:: python - ds = xr.Dataset({'foo': data, 'bar': ('x', [1, 2]), 'baz': np.pi}) + ds = xr.Dataset({"foo": data, "bar": ("x", [1, 2]), "baz": np.pi}) ds @@ -186,7 +184,7 @@ This creates a dataset with three DataArrays named ``foo``, ``bar`` and ``baz``. .. ipython:: python - ds['foo'] + ds["foo"] ds.foo @@ -216,14 +214,15 @@ You can directly read and write xarray objects to disk using :py:meth:`~xarray.D .. ipython:: python - ds.to_netcdf('example.nc') - xr.open_dataset('example.nc') + ds.to_netcdf("example.nc") + xr.open_dataset("example.nc") .. ipython:: python - :suppress: + :suppress: import os - os.remove('example.nc') + + os.remove("example.nc") It is common for datasets to be distributed across multiple files (commonly one file per timestep). xarray supports this use-case by providing the :py:meth:`~xarray.open_mfdataset` and the :py:meth:`~xarray.save_mfdataset` methods. For more, see :ref:`io`. diff --git a/doc/reshaping.rst b/doc/reshaping.rst index 465ca14dfc2..40de9ea799a 100644 --- a/doc/reshaping.rst +++ b/doc/reshaping.rst @@ -7,11 +7,12 @@ Reshaping and reorganizing data These methods allow you to reorganize .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Reordering dimensions @@ -23,9 +24,9 @@ ellipsis (`...`) can be use to represent all other dimensions: .. ipython:: python - ds = xr.Dataset({'foo': (('x', 'y', 'z'), [[[42]]]), 'bar': (('y', 'z'), [[24]])}) - ds.transpose('y', 'z', 'x') - ds.transpose(..., 'x') # equivalent + ds = xr.Dataset({"foo": (("x", "y", "z"), [[[42]]]), "bar": (("y", "z"), [[24]])}) + ds.transpose("y", "z", "x") + ds.transpose(..., "x") # equivalent ds.transpose() # reverses all dimensions Expand and squeeze dimensions @@ -37,7 +38,7 @@ use :py:meth:`~xarray.DataArray.expand_dims` .. ipython:: python - expanded = ds.expand_dims('w') + expanded = ds.expand_dims("w") expanded This method attaches a new dimension with size 1 to all data variables. @@ -48,7 +49,7 @@ use :py:meth:`~xarray.DataArray.squeeze` .. ipython:: python - expanded.squeeze('w') + expanded.squeeze("w") Converting between datasets and arrays -------------------------------------- @@ -69,14 +70,14 @@ To convert back from a DataArray to a Dataset, use .. ipython:: python - arr.to_dataset(dim='variable') + arr.to_dataset(dim="variable") The broadcasting behavior of ``to_array`` means that the resulting array includes the union of data variable dimensions: .. ipython:: python - ds2 = xr.Dataset({'a': 0, 'b': ('x', [3, 4, 5])}) + ds2 = xr.Dataset({"a": 0, "b": ("x", [3, 4, 5])}) # the input dataset has 4 elements ds2 @@ -90,7 +91,7 @@ If you use ``to_dataset`` without supplying the ``dim`` argument, the DataArray .. ipython:: python - arr.to_dataset(name='combined') + arr.to_dataset(name="combined") .. _reshape.stack: @@ -103,11 +104,12 @@ implemented :py:meth:`~xarray.DataArray.stack` and .. ipython:: python - array = xr.DataArray(np.random.randn(2, 3), - coords=[('x', ['a', 'b']), ('y', [0, 1, 2])]) - stacked = array.stack(z=('x', 'y')) + array = xr.DataArray( + np.random.randn(2, 3), coords=[("x", ["a", "b"]), ("y", [0, 1, 2])] + ) + stacked = array.stack(z=("x", "y")) stacked - stacked.unstack('z') + stacked.unstack("z") As elsewhere in xarray, an ellipsis (`...`) can be used to represent all unlisted dimensions: @@ -128,15 +130,15 @@ possible levels. Missing levels are filled in with ``NaN`` in the resulting obje stacked2 = stacked[::2] stacked2 - stacked2.unstack('z') + stacked2.unstack("z") However, xarray's ``stack`` has an important difference from pandas: unlike pandas, it does not automatically drop missing values. Compare: .. ipython:: python - array = xr.DataArray([[np.nan, 1], [2, 3]], dims=['x', 'y']) - array.stack(z=('x', 'y')) + array = xr.DataArray([[np.nan, 1], [2, 3]], dims=["x", "y"]) + array.stack(z=("x", "y")) array.to_pandas().stack() We departed from pandas's behavior here because predictable shapes for new @@ -166,16 +168,15 @@ like this: .. ipython:: python - data = xr.Dataset( - data_vars={'a': (('x', 'y'), [[0, 1, 2], [3, 4, 5]]), - 'b': ('x', [6, 7])}, - coords={'y': ['u', 'v', 'w']} - ) - data - stacked = data.to_stacked_array("z", sample_dims=['x']) - stacked - unstacked = stacked.to_unstacked_dataset("z") - unstacked + data = xr.Dataset( + data_vars={"a": (("x", "y"), [[0, 1, 2], [3, 4, 5]]), "b": ("x", [6, 7])}, + coords={"y": ["u", "v", "w"]}, + ) + data + stacked = data.to_stacked_array("z", sample_dims=["x"]) + stacked + unstacked = stacked.to_unstacked_dataset("z") + unstacked In this example, ``stacked`` is a two dimensional array that we can easily pass to a scikit-learn or another generic numerical method. @@ -202,19 +203,23 @@ coordinates using :py:meth:`~xarray.DataArray.set_index`: .. ipython:: python - da = xr.DataArray(np.random.rand(4), - coords={'band': ('x', ['a', 'a', 'b', 'b']), - 'wavenumber': ('x', np.linspace(200, 400, 4))}, - dims='x') - da - mda = da.set_index(x=['band', 'wavenumber']) - mda + da = xr.DataArray( + np.random.rand(4), + coords={ + "band": ("x", ["a", "a", "b", "b"]), + "wavenumber": ("x", np.linspace(200, 400, 4)), + }, + dims="x", + ) + da + mda = da.set_index(x=["band", "wavenumber"]) + mda These coordinates can now be used for indexing, e.g., .. ipython:: python - mda.sel(band='a') + mda.sel(band="a") Conversely, you can use :py:meth:`~xarray.DataArray.reset_index` to extract multi-index levels as coordinates (this is mainly useful @@ -222,14 +227,14 @@ for serialization): .. ipython:: python - mda.reset_index('x') + mda.reset_index("x") :py:meth:`~xarray.DataArray.reorder_levels` allows changing the order of multi-index levels: .. ipython:: python - mda.reorder_levels(x=['wavenumber', 'band']) + mda.reorder_levels(x=["wavenumber", "band"]) As of xarray v0.9 coordinate labels for each dimension are optional. You can also use ``.set_index`` / ``.reset_index`` to add / remove @@ -237,12 +242,12 @@ labels for one or several dimensions: .. ipython:: python - array = xr.DataArray([1, 2, 3], dims='x') + array = xr.DataArray([1, 2, 3], dims="x") array - array['c'] = ('x', ['a', 'b', 'c']) - array.set_index(x='c') - array = array.set_index(x='c') - array = array.reset_index('x', drop=True) + array["c"] = ("x", ["a", "b", "c"]) + array.set_index(x="c") + array = array.set_index(x="c") + array = array.reset_index("x", drop=True) .. _reshape.shift_and_roll: @@ -254,9 +259,9 @@ To adjust coordinate labels, you can use the :py:meth:`~xarray.Dataset.shift` an .. ipython:: python - array = xr.DataArray([1, 2, 3, 4], dims='x') - array.shift(x=2) - array.roll(x=2, roll_coords=True) + array = xr.DataArray([1, 2, 3, 4], dims="x") + array.shift(x=2) + array.roll(x=2, roll_coords=True) .. _reshape.sort: @@ -269,17 +274,18 @@ One may sort a DataArray/Dataset via :py:meth:`~xarray.DataArray.sortby` and .. ipython:: python - ds = xr.Dataset({'A': (('x', 'y'), [[1, 2], [3, 4]]), - 'B': (('x', 'y'), [[5, 6], [7, 8]])}, - coords={'x': ['b', 'a'], 'y': [1, 0]}) - dax = xr.DataArray([100, 99], [('x', [0, 1])]) - day = xr.DataArray([90, 80], [('y', [0, 1])]) - ds.sortby([day, dax]) + ds = xr.Dataset( + {"A": (("x", "y"), [[1, 2], [3, 4]]), "B": (("x", "y"), [[5, 6], [7, 8]])}, + coords={"x": ["b", "a"], "y": [1, 0]}, + ) + dax = xr.DataArray([100, 99], [("x", [0, 1])]) + day = xr.DataArray([90, 80], [("y", [0, 1])]) + ds.sortby([day, dax]) As a shortcut, you can refer to existing coordinates by name: .. ipython:: python - ds.sortby('x') - ds.sortby(['y', 'x']) - ds.sortby(['y', 'x'], ascending=False) + ds.sortby("x") + ds.sortby(["y", "x"]) + ds.sortby(["y", "x"], ascending=False) \ No newline at end of file diff --git a/doc/time-series.rst b/doc/time-series.rst index d838dbbd4cd..96a2edc0ea5 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -10,11 +10,12 @@ data in pandas such a joy to xarray. In most cases, we rely on pandas for the core functionality. .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xr + np.random.seed(123456) Creating datetime64 data @@ -29,8 +30,8 @@ using :py:func:`pandas.to_datetime` and :py:func:`pandas.date_range`: .. ipython:: python - pd.to_datetime(['2000-01-01', '2000-02-02']) - pd.date_range('2000-01-01', periods=365) + pd.to_datetime(["2000-01-01", "2000-02-02"]) + pd.date_range("2000-01-01", periods=365) Alternatively, you can supply arrays of Python ``datetime`` objects. These get converted automatically when used as arguments in xarray objects: @@ -38,7 +39,8 @@ converted automatically when used as arguments in xarray objects: .. ipython:: python import datetime - xr.Dataset({'time': datetime.datetime(2000, 1, 1)}) + + xr.Dataset({"time": datetime.datetime(2000, 1, 1)}) When reading or writing netCDF files, xarray automatically decodes datetime and timedelta arrays using `CF conventions`_ (that is, by using a ``units`` @@ -62,8 +64,8 @@ You can manual decode arrays in this form by passing a dataset to .. ipython:: python - attrs = {'units': 'hours since 2000-01-01'} - ds = xr.Dataset({'time': ('time', [0, 1, 2, 3], attrs)}) + attrs = {"units": "hours since 2000-01-01"} + ds = xr.Dataset({"time": ("time", [0, 1, 2, 3], attrs)}) xr.decode_cf(ds) One unfortunate limitation of using ``datetime64[ns]`` is that it limits the @@ -87,10 +89,10 @@ items and with the `slice` object: .. ipython:: python - time = pd.date_range('2000-01-01', freq='H', periods=365 * 24) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 24)), 'time': time}) - ds.sel(time='2000-01') - ds.sel(time=slice('2000-06-01', '2000-06-10')) + time = pd.date_range("2000-01-01", freq="H", periods=365 * 24) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 24)), "time": time}) + ds.sel(time="2000-01") + ds.sel(time=slice("2000-06-01", "2000-06-10")) You can also select a particular time by indexing with a :py:class:`datetime.time` object: @@ -113,8 +115,8 @@ given ``DataArray`` can be quickly computed using a special ``.dt`` accessor. .. ipython:: python - time = pd.date_range('2000-01-01', freq='6H', periods=365 * 4) - ds = xr.Dataset({'foo': ('time', np.arange(365 * 4)), 'time': time}) + time = pd.date_range("2000-01-01", freq="6H", periods=365 * 4) + ds = xr.Dataset({"foo": ("time", np.arange(365 * 4)), "time": time}) ds.time.dt.hour ds.time.dt.dayofweek @@ -130,16 +132,16 @@ __ http://pandas.pydata.org/pandas-docs/stable/api.html#time-date-components .. ipython:: python - ds['time.month'] - ds['time.dayofyear'] + ds["time.month"] + ds["time.dayofyear"] For use as a derived coordinate, xarray adds ``'season'`` to the list of datetime components supported by pandas: .. ipython:: python - ds['time.season'] - ds['time'].dt.season + ds["time.season"] + ds["time"].dt.season The set of valid seasons consists of 'DJF', 'MAM', 'JJA' and 'SON', labeled by the first letters of the corresponding months. @@ -152,7 +154,7 @@ __ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases .. ipython:: python - ds['time'].dt.floor('D') + ds["time"].dt.floor("D") The ``.dt`` accessor can also be used to generate formatted datetime strings for arrays utilising the same formatting as the standard `datetime.strftime`_. @@ -161,7 +163,7 @@ for arrays utilising the same formatting as the standard `datetime.strftime`_. .. ipython:: python - ds['time'].dt.strftime('%a, %b %d %H:%M') + ds["time"].dt.strftime("%a, %b %d %H:%M") .. _resampling: @@ -173,9 +175,9 @@ Datetime components couple particularly well with grouped operations (see calculate the mean by time of day: .. ipython:: python - :okwarning: + :okwarning: - ds.groupby('time.hour').mean() + ds.groupby("time.hour").mean() For upsampling or downsampling temporal resolutions, xarray offers a :py:meth:`~xarray.Dataset.resample` method building on the core functionality @@ -187,25 +189,25 @@ same api as ``resample`` `in pandas`_. For example, we can downsample our dataset from hourly to 6-hourly: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H') + ds.resample(time="6H") This will create a specialized ``Resample`` object which saves information necessary for resampling. All of the reduction methods which work with ``Resample`` objects can also be used for resampling: .. ipython:: python - :okwarning: + :okwarning: - ds.resample(time='6H').mean() + ds.resample(time="6H").mean() You can also supply an arbitrary reduction function to aggregate over each resampling group: .. ipython:: python - ds.resample(time='6H').reduce(np.mean) + ds.resample(time="6H").reduce(np.mean) For upsampling, xarray provides six methods: ``asfreq``, ``ffill``, ``bfill``, ``pad``, ``nearest`` and ``interpolate``. ``interpolate`` extends ``scipy.interpolate.interp1d`` @@ -218,7 +220,7 @@ Data that has indices outside of the given ``tolerance`` are set to ``NaN``. .. ipython:: python - ds.resample(time='1H').nearest(tolerance='1H') + ds.resample(time="1H").nearest(tolerance="1H") For more examples of using grouped operations on a time dimension, see diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst index 768cf6556f9..1eb63d24630 100644 --- a/doc/weather-climate.rst +++ b/doc/weather-climate.rst @@ -4,7 +4,7 @@ Weather and climate data ======================== .. ipython:: python - :suppress: + :suppress: import xarray as xr @@ -56,11 +56,14 @@ coordinate with dates from a no-leap calendar and a .. ipython:: python - from itertools import product - from cftime import DatetimeNoLeap - dates = [DatetimeNoLeap(year, month, 1) for year, month in - product(range(1, 3), range(1, 13))] - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + from itertools import product + from cftime import DatetimeNoLeap + + dates = [ + DatetimeNoLeap(year, month, 1) + for year, month in product(range(1, 3), range(1, 13)) + ] + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") xarray also includes a :py:func:`~xarray.cftime_range` function, which enables creating a :py:class:`~xarray.CFTimeIndex` with regularly-spaced dates. For @@ -68,8 +71,8 @@ instance, we can create the same dates and DataArray we created above using: .. ipython:: python - dates = xr.cftime_range(start='0001', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') + dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo") With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the @@ -80,8 +83,8 @@ using the same formatting as the standard `datetime.strftime`_ convention . .. ipython:: python - dates.strftime('%c') - da['time'].dt.strftime('%Y%m%d') + dates.strftime("%c") + da["time"].dt.strftime("%Y%m%d") For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: @@ -90,8 +93,8 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.sel(time='0001') - da.sel(time=slice('0001-05', '0002-02')) + da.sel(time="0001") + da.sel(time=slice("0001-05", "0002-02")) - Access of basic datetime components via the ``dt`` accessor (in this case just "year", "month", "day", "hour", "minute", "second", "microsecond", @@ -99,64 +102,65 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: .. ipython:: python - da.time.dt.year - da.time.dt.month - da.time.dt.season - da.time.dt.dayofyear - da.time.dt.dayofweek - da.time.dt.days_in_month + da.time.dt.year + da.time.dt.month + da.time.dt.season + da.time.dt.dayofyear + da.time.dt.dayofweek + da.time.dt.days_in_month - Rounding of datetimes to fixed frequencies via the ``dt`` accessor: .. ipython:: python - da.time.dt.ceil('3D') - da.time.dt.floor('5D') - da.time.dt.round('2D') + da.time.dt.ceil("3D") + da.time.dt.floor("5D") + da.time.dt.round("2D") - Group-by operations based on datetime accessor attributes (e.g. by month of the year): .. ipython:: python - da.groupby('time.month').sum() + da.groupby("time.month").sum() - Interpolation using :py:class:`cftime.datetime` objects: .. ipython:: python - da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) + da.interp(time=[DatetimeNoLeap(1, 1, 15), DatetimeNoLeap(1, 2, 15)]) - Interpolation using datetime strings: .. ipython:: python - da.interp(time=['0001-01-15', '0001-02-15']) + da.interp(time=["0001-01-15", "0001-02-15"]) - Differentiation: .. ipython:: python - da.differentiate('time') + da.differentiate("time") - Serialization: .. ipython:: python - da.to_netcdf('example-no-leap.nc') - xr.open_dataset('example-no-leap.nc') + da.to_netcdf("example-no-leap.nc") + xr.open_dataset("example-no-leap.nc") .. ipython:: python :suppress: import os - os.remove('example-no-leap.nc') + + os.remove("example-no-leap.nc") - And resampling along the time dimension for data indexed by a :py:class:`~xarray.CFTimeIndex`: .. ipython:: python - da.resample(time='81T', closed='right', label='right', base=3).mean() + da.resample(time="81T", closed="right", label="right", base=3).mean() .. note:: @@ -168,13 +172,13 @@ For data indexed by a :py:class:`~xarray.CFTimeIndex` xarray currently supports: method: .. ipython:: python - :okwarning: + :okwarning: - modern_times = xr.cftime_range('2000', periods=24, freq='MS', calendar='noleap') - da = xr.DataArray(range(24), [('time', modern_times)]) + modern_times = xr.cftime_range("2000", periods=24, freq="MS", calendar="noleap") + da = xr.DataArray(range(24), [("time", modern_times)]) da - datetimeindex = da.indexes['time'].to_datetimeindex() - da['time'] = datetimeindex + datetimeindex = da.indexes["time"].to_datetimeindex() + da["time"] = datetimeindex However in this case one should use caution to only perform operations which do not depend on differences between dates (e.g. differentiation, diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b71e0baa655..42e20bbf1bd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -4,13 +4,14 @@ What's New ========== .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd import xarray as xray import xarray import xarray as xr + np.random.seed(123456) .. _whats-new.0.16.0: @@ -109,6 +110,8 @@ Documentation of ``kwargs`` in :py:meth:`Dataset.interp` and :py:meth:`DataArray.interp` for 1-d and n-d interpolation (:pull:`3956`). By `Matthias Riße `_. +- Apply ``black`` to all the code in the documentation (:pull:`4012`) + By `Justus Magin `_. Internal Changes ~~~~~~~~~~~~~~~~ @@ -1965,8 +1968,8 @@ Enhancements .. ipython:: python - ds = xr.Dataset({'a': 1}) - np.sin(ds) + ds = xr.Dataset({"a": 1}) + np.sin(ds) This obliviates the need for the ``xarray.ufuncs`` module, which will be deprecated in the future when xarray drops support for older versions of @@ -2057,8 +2060,8 @@ Enhancements .. ipython:: python - da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims='x') - da.sum() + da = xr.DataArray(np.array([True, False, np.nan], dtype=object), dims="x") + da.sum() (:issue:`1866`) By `Keisuke Fujii `_. @@ -2212,7 +2215,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample('24H', dim='time', how='max') + In [1]: ds.resample("24H", dim="time", how="max") Out[1]: [...] @@ -2222,7 +2225,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: ds.resample(time='24H').max() + In [1]: ds.resample(time="24H").max() Out[1]: [...] @@ -2292,9 +2295,9 @@ Enhancements In [1]: import xarray as xr - In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=('x', 'y')) + In [2]: arr = xr.DataArray([[1, 2, 3], [4, 5, 6]], dims=("x", "y")) - In [3]: xr.where(arr % 2, 'even', 'odd') + In [3]: xr.where(arr % 2, "even", "odd") Out[3]: array([['even', 'odd', 'even'], @@ -2815,7 +2818,7 @@ Breaking changes .. ipython:: :verbatim: - In [1]: xr.Dataset({'foo': (('x', 'y'), [[1, 2]])}) + In [1]: xr.Dataset({"foo": (("x", "y"), [[1, 2]])}) Out[1]: Dimensions: (x: 1, y: 2) @@ -3272,10 +3275,10 @@ Enhancements .. ipython:: :verbatim: - In [1]: import xarray as xr; import numpy as np + In [1]: import xarray as xr + ...: import numpy as np - In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), - dims=('x', 'y')) + In [2]: arr = xr.DataArray(np.arange(0, 7.5, 0.5).reshape(3, 5), dims=("x", "y")) In [3]: arr Out[3]: @@ -3414,7 +3417,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3426,7 +3429,7 @@ Breaking changes .. ipython:: :verbatim: - In [2]: xray.DataArray([4, 5, 6], dims='x', name='x') + In [2]: xray.DataArray([4, 5, 6], dims="x", name="x") Out[2]: array([4, 5, 6]) @@ -3449,13 +3452,11 @@ Enhancements .. ipython:: :verbatim: - In [7]: df = pd.DataFrame({'foo': range(3), - ...: 'x': ['a', 'b', 'b'], - ...: 'y': [0, 0, 1]}) + In [7]: df = pd.DataFrame({"foo": range(3), "x": ["a", "b", "b"], "y": [0, 0, 1]}) - In [8]: s = df.set_index(['x', 'y'])['foo'] + In [8]: s = df.set_index(["x", "y"])["foo"] - In [12]: arr = xray.DataArray(s, dims='z') + In [12]: arr = xray.DataArray(s, dims="z") In [13]: arr Out[13]: @@ -3464,13 +3465,13 @@ Enhancements Coordinates: * z (z) object ('a', 0) ('b', 0) ('b', 1) - In [19]: arr.indexes['z'] + In [19]: arr.indexes["z"] Out[19]: MultiIndex(levels=[[u'a', u'b'], [0, 1]], labels=[[0, 1, 1], [0, 0, 1]], names=[u'x', u'y']) - In [14]: arr.unstack('z') + In [14]: arr.unstack("z") Out[14]: array([[ 0., nan], @@ -3479,7 +3480,7 @@ Enhancements * x (x) object 'a' 'b' * y (y) int64 0 1 - In [26]: arr.unstack('z').stack(z=('x', 'y')) + In [26]: arr.unstack("z").stack(z=("x", "y")) Out[26]: array([ 0., nan, 1., 2.]) @@ -3507,9 +3508,9 @@ Enhancements for shifting/rotating datasets or arrays along a dimension: .. ipython:: python - :okwarning: + :okwarning: - array = xray.DataArray([5, 6, 7, 8], dims='x') + array = xray.DataArray([5, 6, 7, 8], dims="x") array.shift(x=2) array.roll(x=2) @@ -3524,8 +3525,8 @@ Enhancements .. ipython:: python - a = xray.DataArray([1, 2, 3], dims='x') - b = xray.DataArray([5, 6], dims='y') + a = xray.DataArray([1, 2, 3], dims="x") + b = xray.DataArray([5, 6], dims="y") a b a2, b2 = xray.broadcast(a, b) @@ -3595,9 +3596,9 @@ Enhancements .. ipython:: :verbatim: - In [5]: array = xray.DataArray([1, 2, 3], dims='x') + In [5]: array = xray.DataArray([1, 2, 3], dims="x") - In [6]: array.reindex(x=[0.9, 1.5], method='nearest', tolerance=0.2) + In [6]: array.reindex(x=[0.9, 1.5], method="nearest", tolerance=0.2) Out[6]: array([ 2., nan]) @@ -3677,10 +3678,11 @@ Enhancements .. ipython:: :verbatim: - In [1]: da = xray.DataArray(np.arange(56).reshape((7, 8)), - ...: coords={'x': list('abcdefg'), - ...: 'y': 10 * np.arange(8)}, - ...: dims=['x', 'y']) + In [1]: da = xray.DataArray( + ...: np.arange(56).reshape((7, 8)), + ...: coords={"x": list("abcdefg"), "y": 10 * np.arange(8)}, + ...: dims=["x", "y"], + ...: ) In [2]: da Out[2]: @@ -3697,7 +3699,7 @@ Enhancements * x (x) |S1 'a' 'b' 'c' 'd' 'e' 'f' 'g' # we can index by position along each dimension - In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim='points') + In [3]: da.isel_points(x=[0, 1, 6], y=[0, 1, 0], dim="points") Out[3]: array([ 0, 9, 48]) @@ -3707,7 +3709,7 @@ Enhancements * points (points) int64 0 1 2 # or equivalently by label - In [9]: da.sel_points(x=['a', 'b', 'g'], y=[0, 10, 0], dim='points') + In [9]: da.sel_points(x=["a", "b", "g"], y=[0, 10, 0], dim="points") Out[9]: array([ 0, 9, 48]) @@ -3721,11 +3723,11 @@ Enhancements .. ipython:: python - ds = xray.Dataset(coords={'x': range(100), 'y': range(100)}) - ds['distance'] = np.sqrt(ds.x ** 2 + ds.y ** 2) + ds = xray.Dataset(coords={"x": range(100), "y": range(100)}) + ds["distance"] = np.sqrt(ds.x ** 2 + ds.y ** 2) - @savefig where_example.png width=4in height=4in - ds.distance.where(ds.distance < 100).plot() + @savefig where_example.png width=4in height=4in + ds.distance.where(ds.distance < 100).plot() - Added new methods ``xray.DataArray.diff`` and ``xray.Dataset.diff`` for finite difference calculations along a given axis. @@ -3735,9 +3737,9 @@ Enhancements .. ipython:: python - da = xray.DataArray(np.random.random_sample(size=(5, 4))) - da.where(da < 0.5) - da.where(da < 0.5).to_masked_array(copy=True) + da = xray.DataArray(np.random.random_sample(size=(5, 4))) + da.where(da < 0.5) + da.where(da < 0.5).to_masked_array(copy=True) - Added new flag "drop_variables" to ``xray.open_dataset`` for excluding variables from being parsed. This may be useful to drop @@ -3795,9 +3797,9 @@ Enhancements .. ipython:: :verbatim: - In [1]: years, datasets = zip(*ds.groupby('time.year')) + In [1]: years, datasets = zip(*ds.groupby("time.year")) - In [2]: paths = ['%s.nc' % y for y in years] + In [2]: paths = ["%s.nc" % y for y in years] In [3]: xray.save_mfdataset(datasets, paths) @@ -3870,9 +3872,9 @@ Backwards incompatible changes .. ipython:: :verbatim: - In [1]: ds = xray.Dataset({'x': 0}) + In [1]: ds = xray.Dataset({"x": 0}) - In [2]: xray.concat([ds, ds], dim='y') + In [2]: xray.concat([ds, ds], dim="y") Out[2]: Dimensions: () @@ -3884,13 +3886,13 @@ Backwards incompatible changes Now, the default always concatenates data variables: .. ipython:: python - :suppress: + :suppress: - ds = xray.Dataset({'x': 0}) + ds = xray.Dataset({"x": 0}) .. ipython:: python - xray.concat([ds, ds], dim='y') + xray.concat([ds, ds], dim="y") To obtain the old behavior, supply the argument ``concat_over=[]``. @@ -3903,17 +3905,20 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'a': 1, 'b': ('x', [1, 2, 3])}, - coords={'c': 42}, attrs={'Conventions': 'None'}) + ds = xray.Dataset( + {"a": 1, "b": ("x", [1, 2, 3])}, + coords={"c": 42}, + attrs={"Conventions": "None"}, + ) ds.to_array() - ds.to_array().to_dataset(dim='variable') + ds.to_array().to_dataset(dim="variable") - New ``xray.Dataset.fillna`` method to fill missing values, modeled off the pandas method of the same name: .. ipython:: python - array = xray.DataArray([np.nan, 1, np.nan, 3], dims='x') + array = xray.DataArray([np.nan, 1, np.nan, 3], dims="x") array.fillna(0) ``fillna`` works on both ``Dataset`` and ``DataArray`` objects, and uses @@ -3926,9 +3931,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'y': ('x', [1, 2, 3])}) - ds.assign(z = lambda ds: ds.y ** 2) - ds.assign_coords(z = ('x', ['a', 'b', 'c'])) + ds = xray.Dataset({"y": ("x", [1, 2, 3])}) + ds.assign(z=lambda ds: ds.y ** 2) + ds.assign_coords(z=("x", ["a", "b", "c"])) These methods return a new Dataset (or DataArray) with updated data or coordinate variables. @@ -3941,7 +3946,7 @@ Enhancements .. ipython:: :verbatim: - In [12]: ds.sel(x=1.1, method='nearest') + In [12]: ds.sel(x=1.1, method="nearest") Out[12]: Dimensions: () @@ -3950,7 +3955,7 @@ Enhancements Data variables: y int64 2 - In [13]: ds.sel(x=[1.1, 2.1], method='pad') + In [13]: ds.sel(x=[1.1, 2.1], method="pad") Out[13]: Dimensions: (x: 2) @@ -3976,7 +3981,7 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': np.arange(1000)}) + ds = xray.Dataset({"x": np.arange(1000)}) with xray.set_options(display_width=40): print(ds) @@ -4014,42 +4019,42 @@ Enhancements need to supply the time dimension explicitly: .. ipython:: python - :verbatim: + :verbatim: - time = pd.date_range('2000-01-01', freq='6H', periods=10) - array = xray.DataArray(np.arange(10), [('time', time)]) - array.resample('1D', dim='time') + time = pd.date_range("2000-01-01", freq="6H", periods=10) + array = xray.DataArray(np.arange(10), [("time", time)]) + array.resample("1D", dim="time") You can specify how to do the resampling with the ``how`` argument and other options such as ``closed`` and ``label`` let you control labeling: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='sum', label='right') + array.resample("1D", dim="time", how="sum", label="right") If the desired temporal resolution is higher than the original data (upsampling), xray will insert missing values: .. ipython:: python - :verbatim: + :verbatim: - array.resample('3H', 'time') + array.resample("3H", "time") - ``first`` and ``last`` methods on groupby objects let you take the first or last examples from each group along the grouped axis: .. ipython:: python - :verbatim: + :verbatim: - array.groupby('time.day').first() + array.groupby("time.day").first() These methods combine well with ``resample``: .. ipython:: python - :verbatim: + :verbatim: - array.resample('1D', dim='time', how='first') + array.resample("1D", dim="time", how="first") - ``xray.Dataset.swap_dims`` allows for easily swapping one dimension @@ -4057,9 +4062,9 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'x': range(3), 'y': ('x', list('abc'))}) - ds - ds.swap_dims({'x': 'y'}) + ds = xray.Dataset({"x": range(3), "y": ("x", list("abc"))}) + ds + ds.swap_dims({"x": "y"}) This was possible in earlier versions of xray, but required some contortions. - ``xray.open_dataset`` and ``xray.Dataset.to_netcdf`` now @@ -4105,8 +4110,8 @@ Breaking changes .. ipython:: python - lhs = xray.DataArray([1, 2, 3], [('x', [0, 1, 2])]) - rhs = xray.DataArray([2, 3, 4], [('x', [1, 2, 3])]) + lhs = xray.DataArray([1, 2, 3], [("x", [0, 1, 2])]) + rhs = xray.DataArray([2, 3, 4], [("x", [1, 2, 3])]) lhs + rhs :ref:`For dataset construction and merging`, we align based on the @@ -4114,14 +4119,14 @@ Breaking changes .. ipython:: python - xray.Dataset({'foo': lhs, 'bar': rhs}) + xray.Dataset({"foo": lhs, "bar": rhs}) :ref:`For update and __setitem__`, we align based on the **original** object: .. ipython:: python - lhs.coords['rhs'] = rhs + lhs.coords["rhs"] = rhs lhs - Aggregations like ``mean`` or ``median`` now skip missing values by default: @@ -4144,8 +4149,8 @@ Breaking changes .. ipython:: python - a = xray.DataArray([1, 2], coords={'c': 0}, dims='x') - b = xray.DataArray([1, 2], coords={'c': ('x', [0, 0])}, dims='x') + a = xray.DataArray([1, 2], coords={"c": 0}, dims="x") + b = xray.DataArray([1, 2], coords={"c": ("x", [0, 0])}, dims="x") (a + b).coords This functionality can be controlled through the ``compat`` option, which @@ -4156,9 +4161,10 @@ Breaking changes .. ipython:: python - time = xray.DataArray(pd.date_range('2000-01-01', periods=365), - dims='time', name='time') - counts = time.groupby('time.month').count() + time = xray.DataArray( + pd.date_range("2000-01-01", periods=365), dims="time", name="time" + ) + counts = time.groupby("time.month").count() counts.sel(month=2) Previously, you would need to use something like @@ -4168,8 +4174,8 @@ Breaking changes .. ipython:: python - ds = xray.Dataset({'t': pd.date_range('2000-01-01', periods=12, freq='M')}) - ds['t.season'] + ds = xray.Dataset({"t": pd.date_range("2000-01-01", periods=12, freq="M")}) + ds["t.season"] Previously, it returned numbered seasons 1 through 4. - We have updated our use of the terms of "coordinates" and "variables". What @@ -4192,8 +4198,8 @@ Enhancements .. ipython:: python - data = xray.DataArray([1, 2, 3], [('x', range(3))]) - data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method='pad') + data = xray.DataArray([1, 2, 3], [("x", range(3))]) + data.reindex(x=[0.5, 1, 1.5, 2, 2.5], method="pad") This will be especially useful once pandas 0.16 is released, at which point xray will immediately support reindexing with @@ -4212,15 +4218,15 @@ Enhancements makes it easy to drop explicitly listed variables or index labels: .. ipython:: python - :okwarning: + :okwarning: # drop variables - ds = xray.Dataset({'x': 0, 'y': 1}) - ds.drop('x') + ds = xray.Dataset({"x": 0, "y": 1}) + ds.drop("x") # drop index labels - arr = xray.DataArray([1, 2, 3], coords=[('x', list('abc'))]) - arr.drop(['a', 'c'], dim='x') + arr = xray.DataArray([1, 2, 3], coords=[("x", list("abc"))]) + arr.drop(["a", "c"], dim="x") - ``xray.Dataset.broadcast_equals`` has been added to correspond to the new ``compat`` option. @@ -4288,7 +4294,8 @@ Backwards incompatible changes .. ipython:: python from datetime import datetime - xray.Dataset({'t': [datetime(2000, 1, 1)]}) + + xray.Dataset({"t": [datetime(2000, 1, 1)]}) - xray now has support (including serialization to netCDF) for :py:class:`~pandas.TimedeltaIndex`. :py:class:`datetime.timedelta` objects @@ -4304,8 +4311,8 @@ Enhancements .. ipython:: python - ds = xray.Dataset({'tmin': ([], 25, {'units': 'celsius'})}) - ds.tmin.units + ds = xray.Dataset({"tmin": ([], 25, {"units": "celsius"})}) + ds.tmin.units Tab-completion for these variables should work in editors such as IPython. However, setting variables or attributes in this fashion is not yet @@ -4315,7 +4322,7 @@ Enhancements .. ipython:: python - array = xray.DataArray(np.zeros(5), dims=['x']) + array = xray.DataArray(np.zeros(5), dims=["x"]) array[dict(x=slice(3))] = 1 array diff --git a/xarray/core/common.py b/xarray/core/common.py index 1e7069ec51f..e343f342040 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -447,7 +447,7 @@ def assign_coords(self, coords=None, **coords_kwargs): New coordinate can also be attached to an existing dimension: >>> lon_2 = np.array([300, 289, 0, 1]) - >>> da.assign_coords(lon_2=('lon', lon_2)) + >>> da.assign_coords(lon_2=("lon", lon_2)) array([0.28298 , 0.667347, 0.657938, 0.177683]) Coordinates: @@ -456,7 +456,7 @@ def assign_coords(self, coords=None, **coords_kwargs): Note that the same result can also be obtained with a dict e.g. - >>> _ = da.assign_coords({"lon_2": ('lon', lon_2)}) + >>> _ = da.assign_coords({"lon_2": ("lon", lon_2)}) Notes ----- diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index ffa05ca64f0..5ced7e251c4 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3495,17 +3495,18 @@ def pad( Examples -------- - >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0,1,2])]) - >>> arr.pad(x=(1,2), constant_values=0) + >>> arr = xr.DataArray([5, 6, 7], coords=[("x", [0, 1, 2])]) + >>> arr.pad(x=(1, 2), constant_values=0) array([0, 5, 6, 7, 0, 0]) Coordinates: * x (x) float64 nan 0.0 1.0 2.0 nan nan - >>> da = xr.DataArray([[0,1,2,3], [10,11,12,13]], - dims=["x", "y"], - coords={"x": [0,1], "y": [10, 20 ,30, 40], "z": ("x", [100, 200])} - ) + >>> da = xr.DataArray( + ... [[0, 1, 2, 3], [10, 11, 12, 13]], + ... dims=["x", "y"], + ... coords={"x": [0, 1], "y": [10, 20, 30, 40], "z": ("x", [100, 200])}, + ... ) >>> da.pad(x=1) array([[nan, nan, nan, nan], @@ -3592,8 +3593,9 @@ def idxmin( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.min() array(-2) @@ -3604,13 +3606,15 @@ def idxmin( array('e', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.min(dim="x") array([-2., -4., 1.]) @@ -3686,8 +3690,9 @@ def idxmax( Examples -------- - >>> array = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) + >>> array = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) >>> array.max() array(2) @@ -3698,13 +3703,15 @@ def idxmax( array('b', dtype='>> array = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": np.arange(5.)**2} - ... ) + >>> array = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": np.arange(5.0) ** 2}, + ... ) >>> array.max(dim="x") array([2., 2., 1.]) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 53aa00f22ce..dd7871eaf3a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1055,9 +1055,7 @@ def copy(self, deep: bool = False, data: Mapping = None) -> "Dataset": structure of the original object, but with the new data. Original object is unaffected. - >>> ds.copy( - ... data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]} - ... ) + >>> ds.copy(data={"foo": np.arange(6).reshape(2, 3), "bar": ["a", "b"]}) Dimensions: (dim_0: 2, dim_1: 3, x: 2) Coordinates: @@ -6061,8 +6059,8 @@ def pad( Examples -------- - >>> ds = xr.Dataset({'foo': ('x', range(5))}) - >>> ds.pad(x=(1,2)) + >>> ds = xr.Dataset({"foo": ("x", range(5))}) + >>> ds.pad(x=(1, 2)) Dimensions: (x: 8) Dimensions without coordinates: x @@ -6156,17 +6154,20 @@ def idxmin( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.min(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.min(dim="x") Dimensions: (y: 3) Coordinates: @@ -6174,7 +6175,7 @@ def idxmin( Data variables: int int64 -2 float (y) float64 -2.0 -4.0 1.0 - >>> ds.argmin(dim='x') + >>> ds.argmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6182,7 +6183,7 @@ def idxmin( Data variables: int int64 4 float (y) int64 4 0 2 - >>> ds.idxmin(dim='x') + >>> ds.idxmin(dim="x") Dimensions: (y: 3) Coordinates: @@ -6251,17 +6252,20 @@ def idxmax( Examples -------- - >>> array1 = xr.DataArray([0, 2, 1, 0, -2], dims="x", - ... coords={"x": ['a', 'b', 'c', 'd', 'e']}) - >>> array2 = xr.DataArray([[2.0, 1.0, 2.0, 0.0, -2.0], - ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], - ... [np.NaN, np.NaN, 1., np.NaN, np.NaN]], - ... dims=["y", "x"], - ... coords={"y": [-1, 0, 1], - ... "x": ['a', 'b', 'c', 'd', 'e']} - ... ) - >>> ds = xr.Dataset({'int': array1, 'float': array2}) - >>> ds.max(dim='x') + >>> array1 = xr.DataArray( + ... [0, 2, 1, 0, -2], dims="x", coords={"x": ["a", "b", "c", "d", "e"]} + ... ) + >>> array2 = xr.DataArray( + ... [ + ... [2.0, 1.0, 2.0, 0.0, -2.0], + ... [-4.0, np.NaN, 2.0, np.NaN, -2.0], + ... [np.NaN, np.NaN, 1.0, np.NaN, np.NaN], + ... ], + ... dims=["y", "x"], + ... coords={"y": [-1, 0, 1], "x": ["a", "b", "c", "d", "e"]}, + ... ) + >>> ds = xr.Dataset({"int": array1, "float": array2}) + >>> ds.max(dim="x") Dimensions: (y: 3) Coordinates: @@ -6269,7 +6273,7 @@ def idxmax( Data variables: int int64 2 float (y) float64 2.0 2.0 1.0 - >>> ds.argmax(dim='x') + >>> ds.argmax(dim="x") Dimensions: (y: 3) Coordinates: @@ -6277,7 +6281,7 @@ def idxmax( Data variables: int int64 1 float (y) int64 0 2 2 - >>> ds.idxmax(dim='x') + >>> ds.idxmax(dim="x") Dimensions: (y: 3) Coordinates: From 3820fb77256682d909c1e41d962e29bec0edd62d Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 29 Apr 2020 18:12:23 +0200 Subject: [PATCH 07/71] Pint support for DataArray (#3643) * remove xfail marks from median and cumprod * remove all xfails not related to indexes or external packages * switch away from using assert_equal_with_units * use assert_allclose in a few cases instead * don't use a kwarg for searchsorted normally, this should work, but the documentation mismatches the implementation of searchsorted and names the keys as `keys` instead of `v` * move the tests for item into their own test function * move the searchsorted tests into their own test function * remove a wrapping pytest.param * treat objects implementing __array_function__ the same as ndarray * mark numpy.median as xfailing * remove the xfail marks for the all and any tests * use assert_units_equal to check the resulting units * don't attempt to use interpolate_na with int dtype arrays * update the xfail reason for DataArray.interpolate_na * xfail the compatible units bivariate_ufunc test and don't use 0 * combine and expand the reindex and interp tests * combine and expand the reindex_like and interp_like tests * xfail the quantile tests if pint is not recent enough * xfail the rolling tests * don't xfail combine_first it currently does not test indexing, so probably will need a new test for that. * use numpy's assert_allclose * don't add dimension coordinates if they're not necessary * add the PR to the list of related PRs * move the whats-new.rst entry to 0.16.0 * check for __array_ufunc__ to decide if the type is supported * xfail the bivariate ufunc tests * remove the check for __array_ufunc__ * skip the DataArray.identical tests * use pytest.param --- doc/whats-new.rst | 3 + xarray/tests/test_units.py | 520 ++++++++++++++++++++----------------- 2 files changed, 289 insertions(+), 234 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 42e20bbf1bd..051a41a57e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -47,6 +47,9 @@ New Features - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) By `Todd Jennings `_ +- More support for unit aware arrays with pint (:pull:`3643`) + By `Justus Magin `_. + - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 2826dc2479c..5dd4a42cff0 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -1660,7 +1660,7 @@ def test_missing_value_fillna(self, unit, error): method("equals"), pytest.param( method("identical"), - marks=pytest.mark.skip(reason="behaviour of identical is unclear"), + marks=pytest.mark.skip(reason="behavior of identical is undecided"), ), ), ids=repr, @@ -1885,7 +1885,10 @@ def test_squeeze(self, dtype): method("coarsen", windows={"y": 2}, func=np.mean), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), pytest.param( method("rank", dim="x"), @@ -2161,8 +2164,8 @@ class TestDataArray: "with_dims", marks=pytest.mark.xfail(reason="units in indexes are not supported"), ), - pytest.param("with_coords"), - pytest.param("without_coords"), + "with_coords", + "without_coords", ), ) def test_init(self, variant, dtype): @@ -2224,21 +2227,17 @@ def test_repr(self, func, variant, dtype): @pytest.mark.parametrize( "func", ( - pytest.param( - function("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - function("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("all"), + function("any"), function("argmax"), function("argmin"), function("max"), function("mean"), pytest.param( function("median"), - marks=pytest.mark.xfail(reason="not implemented by xarray"), + marks=pytest.mark.xfail( + reason="median does not work with dataarrays yet" + ), ), function("min"), pytest.param( @@ -2249,18 +2248,9 @@ def test_repr(self, func, variant, dtype): function("std"), function("var"), function("cumsum"), - pytest.param( - function("cumprod"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("all"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), - pytest.param( - method("any"), - marks=pytest.mark.xfail(reason="not implemented by pint yet"), - ), + function("cumprod"), + method("all"), + method("any"), method("argmax"), method("argmin"), method("max"), @@ -2269,18 +2259,13 @@ def test_repr(self, func, variant, dtype): method("min"), pytest.param( method("prod"), - marks=pytest.mark.xfail( - reason="comparison of quantity with ndarrays in nanops not implemented" - ), + marks=pytest.mark.xfail(reason="not implemented by pint yet"), ), method("sum"), method("std"), method("var"), method("cumsum"), - pytest.param( - method("cumprod"), - marks=pytest.mark.xfail(reason="pint does not implement cumprod yet"), - ), + method("cumprod"), ), ids=repr, ) @@ -2296,7 +2281,8 @@ def test_aggregation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", @@ -2314,7 +2300,8 @@ def test_unary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2333,7 +2320,8 @@ def test_binary_operations(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "comparison", @@ -2383,7 +2371,8 @@ def test_comparison_operations(self, comparison, unit, error, dtype): strip_units(convert_units(to_compare_with, expected_units)), ) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "units,error", @@ -2411,9 +2400,10 @@ def test_univariate_ufunc(self, units, error, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="xarray's `np.maximum` strips units") + @pytest.mark.xfail(reason="needs the type register system for __array_ufunc__") @pytest.mark.parametrize( "unit,error", ( @@ -2422,7 +2412,12 @@ def test_univariate_ufunc(self, units, error, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.mm, None, id="compatible_unit"), + pytest.param( + unit_registry.mm, + None, + id="compatible_unit", + marks=pytest.mark.xfail(reason="pint converts to the wrong units"), + ), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2433,7 +2428,7 @@ def test_bivariate_ufunc(self, unit, error, dtype): if error is not None: with pytest.raises(error): - np.maximum(data_array, 0 * unit) + np.maximum(data_array, 1 * unit) return @@ -2441,16 +2436,18 @@ def test_bivariate_ufunc(self, unit, error, dtype): expected = attach_units( np.maximum( strip_units(data_array), - strip_units(convert_units(0 * unit, expected_units)), + strip_units(convert_units(1 * unit, expected_units)), ), expected_units, ) - actual = np.maximum(data_array, 0 * unit) - assert_equal_with_units(expected, actual) + actual = np.maximum(data_array, 1 * unit) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - actual = np.maximum(0 * unit, data_array) - assert_equal_with_units(expected, actual) + actual = np.maximum(1 * unit, data_array) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize("property", ("T", "imag", "real")) def test_numpy_properties(self, property, dtype): @@ -2466,7 +2463,8 @@ def test_numpy_properties(self, property, dtype): ) actual = getattr(data_array, property) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -2481,16 +2479,86 @@ def test_numpy_methods(self, func, dtype): expected = attach_units(strip_units(data_array), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) + + def test_item(self, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + func = method("item", 2) + + expected = func(strip_units(data_array)) * unit_registry.m + actual = func(data_array) + + np.testing.assert_allclose(expected, actual) + + @pytest.mark.parametrize( + "unit,error", + ( + pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param( + unit_registry.dimensionless, DimensionalityError, id="dimensionless" + ), + pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.cm, None, id="compatible_unit"), + pytest.param(unit_registry.m, None, id="identical_unit"), + ), + ) + @pytest.mark.parametrize( + "func", + ( + method("searchsorted", 5), + pytest.param( + function("searchsorted", 5), + marks=pytest.mark.xfail( + reason="xarray does not implement __array_function__" + ), + ), + ), + ids=repr, + ) + def test_searchsorted(self, func, unit, error, dtype): + array = np.arange(10).astype(dtype) * unit_registry.m + data_array = xr.DataArray(data=array) + + scalar_types = (int, float) + args = list(value * unit for value in func.args) + kwargs = { + key: (value * unit if isinstance(value, scalar_types) else value) + for key, value in func.kwargs.items() + } + + if error is not None: + with pytest.raises(error): + func(data_array, *args, **kwargs) + + return + + units = extract_units(data_array) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] + stripped_kwargs = { + key: strip_units(convert_units(value, units)) + for key, value in kwargs.items() + } + expected = attach_units( + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, + ) + actual = func(data_array, *args, **kwargs) + + assert_units_equal(expected, actual) + np.testing.assert_allclose(expected, actual) @pytest.mark.parametrize( "func", ( method("clip", min=3, max=8), pytest.param( - method("searchsorted", v=5), + function("clip", a_min=3, a_max=8), marks=pytest.mark.xfail( - reason="searchsorted somehow requires a undocumented `keys` argument" + reason="xarray does not implement __array_function__" ), ), ), @@ -2513,28 +2581,32 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): data_array = xr.DataArray(data=array) scalar_types = (int, float) + args = list(value * unit for value in func.args) kwargs = { key: (value * unit if isinstance(value, scalar_types) else value) for key, value in func.kwargs.items() } if error is not None: with pytest.raises(error): - func(data_array, **kwargs) + func(data_array, *args, **kwargs) return units = extract_units(data_array) - expected_units = extract_units(func(array, **kwargs)) + expected_units = extract_units(func(array, *args, **kwargs)) + stripped_args = [strip_units(convert_units(value, units)) for value in args] stripped_kwargs = { key: strip_units(convert_units(value, units)) for key, value in kwargs.items() } expected = attach_units( - func(strip_units(data_array), **stripped_kwargs), expected_units + func(strip_units(data_array), *stripped_args, **stripped_kwargs), + expected_units, ) - actual = func(data_array, **kwargs) + actual = func(data_array, *args, **kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -2551,15 +2623,13 @@ def test_missing_value_detection(self, func, dtype): ) * unit_registry.degK ) - x = np.arange(array.shape[0]) * unit_registry.m - y = np.arange(array.shape[1]) * unit_registry.m - - data_array = xr.DataArray(data=array, coords={"x": x, "y": y}, dims=("x", "y")) + data_array = xr.DataArray(data=array) expected = func(strip_units(data_array)) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose units in data") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -2576,7 +2646,8 @@ def test_missing_value_filling(self, func, dtype): ) actual = func(data_array, dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2586,12 +2657,7 @@ def test_missing_value_filling(self, func, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="fillna converts to value's unit"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -2629,7 +2695,8 @@ def test_fillna(self, fill_value, unit, error, dtype): ) actual = func(data_array, value=value) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) def test_dropna(self, dtype): array = ( @@ -2643,18 +2710,13 @@ def test_dropna(self, dtype): expected = attach_units(strip_units(data_array).dropna(dim="x"), units) actual = data_array.dropna(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", ( - pytest.param( - 1, - id="no_unit", - marks=pytest.mark.xfail( - reason="pint's isin implementation does not work well with mixed args" - ), - ), + pytest.param(1, id="no_unit"), pytest.param(unit_registry.dimensionless, id="dimensionless"), pytest.param(unit_registry.s, id="incompatible_unit"), pytest.param(unit_registry.cm, id="compatible_unit"), @@ -2677,22 +2739,11 @@ def test_isin(self, unit, dtype): ) & array.check(unit) actual = data_array.isin(values) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( - "variant", - ( - pytest.param( - "masking", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - "replacing_scalar", - "replacing_array", - pytest.param( - "dropping", - marks=pytest.mark.xfail(reason="array(nan) is not a quantity"), - ), - ), + "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") ) @pytest.mark.parametrize( "unit,error", @@ -2742,22 +2793,24 @@ def test_where(self, variant, unit, error, dtype): ) actual = data_array.where(**kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="interpolate strips units") - def test_interpolate_na(self, dtype): + @pytest.mark.xfail(reason="uses numpy.vectorize") + def test_interpolate_na(self): array = ( np.array([-1.03, 0.1, 1.4, np.nan, 2.3, np.nan, np.nan, 9.1]) * unit_registry.m ) x = np.arange(len(array)) - data_array = xr.DataArray(data=array, coords={"x": x}, dims="x").astype(dtype) + data_array = xr.DataArray(data=array, coords={"x": x}, dims="x") units = extract_units(data_array) expected = attach_units(strip_units(data_array).interpolate_na(dim="x"), units) actual = data_array.interpolate_na(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -2767,18 +2820,8 @@ def test_interpolate_na(self, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), - pytest.param( - unit_registry.m, - None, - id="identical_unit", - marks=pytest.mark.xfail(reason="depends on reindex"), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit",), + pytest.param(unit_registry.m, None, id="identical_unit",), ), ) def test_combine_first(self, unit, error, dtype): @@ -2807,7 +2850,8 @@ def test_combine_first(self, unit, error, dtype): ) actual = data_array.combine_first(other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2829,7 +2873,17 @@ def test_combine_first(self, unit, error, dtype): "coords", ), ) - @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) + @pytest.mark.parametrize( + "func", + ( + method("equals"), + pytest.param( + method("identical"), + marks=pytest.mark.skip(reason="the behavior of identical is undecided"), + ), + ), + ids=repr, + ) def test_comparisons(self, func, variation, unit, dtype): def is_compatible(a, b): a = a if a is not None else 1 @@ -2903,7 +2957,8 @@ def test_broadcast_like(self, unit, dtype): ) actual = arr1.broadcast_like(arr2) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "unit", @@ -2950,7 +3005,6 @@ def test_broadcast_equals(self, unit, dtype): method("reset_coords", names="x2"), method("copy"), method("astype", np.float32), - method("item", 1), ), ids=repr, ) @@ -2978,7 +3032,8 @@ def test_content_manipulation(self, func, dtype): expected = attach_units(func(strip_units(data_array), **stripped_kwargs), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", (pytest.param(method("copy", data=np.arange(20))),), ids=repr @@ -3004,7 +3059,9 @@ def test_content_manipulation_with_units(self, func, unit, dtype): ) actual = func(data_array, **kwargs) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "indices", @@ -3024,7 +3081,8 @@ def test_isel(self, indices, dtype): ) actual = data_array.isel(x=indices) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3067,7 +3125,9 @@ def test_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3110,7 +3170,9 @@ def test_loc(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.loc[{"x": values}] - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3153,7 +3215,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype): extract_units(data_array), ) actual = data_array.drop_sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "shape", @@ -3181,7 +3245,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(), extract_units(data_array) ) actual = data_array.squeeze() - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) # try squeezing the dimensions separately names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) @@ -3190,7 +3256,9 @@ def test_squeeze(self, shape, dtype): strip_units(data_array).squeeze(dim=name), extract_units(data_array) ) actual = data_array.squeeze(dim=name) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3212,49 +3280,42 @@ def test_head_tail_thin(self, func, dtype): ) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex"), ), + ids=repr, ) - def test_interp(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + y = np.arange(10) * coord_unit - if error is not None: - with pytest.raises(error): - data_array.interp(x=new_coords) - - return + x = np.arange(10) + new_x = np.arange(10) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", y)}, dims="x") units = extract_units(data_array) - expected = attach_units( - strip_units(data_array).interp( - x=strip_units(convert_units(new_coords, {None: unit_registry.m})) - ), - units, - ) - actual = data_array.interp(x=new_coords) + expected = attach_units(func(strip_units(data_array), x=new_x), units) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) - @pytest.mark.xfail(reason="indexes strip units") + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -3267,79 +3328,66 @@ def test_interp(self, unit, error): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_interp_like(self, unit, error): - array = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } - - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + @pytest.mark.parametrize( + "func", (method("interp"), method("reindex")), ids=repr, + ) + def test_interp_reindex_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(10) + 0.5) * unit + data_array = xr.DataArray(array, coords={"x": x}, dims="x") if error is not None: with pytest.raises(error): - data_array.interp_like(other) + func(data_array, x=new_x) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).interp_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + x=strip_units(convert_units(new_x, {None: unit_registry.m})), ), units, ) - actual = data_array.interp_like(other) + actual = func(data_array, x=new_x) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex_like"), ), + ids=repr, ) - def test_reindex(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - new_coords = (np.arange(10) + 0.5) * unit - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, + def test_interp_reindex_like(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - func = method("reindex") - - if error is not None: - with pytest.raises(error): - func(data_array, x=new_coords) + array = np.linspace(1, 2, 10).astype(dtype) * data_unit + coord = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(-2, 2) + 0.5 + data_array = xr.DataArray(array, coords={"x": x, "y": ("x", coord)}, dims="x") + other = xr.DataArray(np.empty_like(new_x), coords={"x": new_x}, dims="x") - expected = attach_units( - func( - strip_units(data_array), - x=strip_units(convert_units(new_coords, {None: unit_registry.m})), - ), - {None: unit_registry.degK}, - ) - actual = func(data_array, x=new_coords) + units = extract_units(data_array) + expected = attach_units(func(strip_units(data_array), other), units) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -3354,38 +3402,35 @@ def test_reindex(self, unit, error, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex_like(self, unit, error, dtype): - array = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - coords = { - "x": (np.arange(10) + 0.3) * unit_registry.m, - "y": (np.arange(5) + 0.3) * unit_registry.m, - } + @pytest.mark.parametrize( + "func", (method("interp_like"), method("reindex_like")), ids=repr, + ) + def test_interp_reindex_like_indexing(self, func, unit, error, dtype): + array = np.linspace(1, 2, 10).astype(dtype) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(-2, 2) + 0.5) * unit - data_array = xr.DataArray(array, coords=coords, dims=("x", "y")) - other = xr.DataArray( - data=np.empty((20, 10)) * unit_registry.degK, - coords={"x": np.arange(20) * unit, "y": np.arange(10) * unit}, - dims=("x", "y"), - ) + data_array = xr.DataArray(array, coords={"x": x}, dims="x") + other = xr.DataArray(np.empty_like(new_x), {"x": new_x}, dims="x") if error is not None: with pytest.raises(error): - data_array.reindex_like(other) + func(data_array, other) return units = extract_units(data_array) expected = attach_units( - strip_units(data_array).reindex_like( - strip_units(convert_units(other, units)) + func( + strip_units(data_array), + strip_units(convert_units(other, {None: unit_registry.m})), ), units, ) - actual = data_array.reindex_like(other) + actual = func(data_array, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3407,7 +3452,8 @@ def test_stacking_stacked(self, func, dtype): expected = attach_units(func(strip_units(stacked)), {"data": unit_registry.m}) actual = func(stacked) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") def test_to_unstacked_dataset(self, dtype): @@ -3430,7 +3476,8 @@ def test_to_unstacked_dataset(self, dtype): ).rename({elem.magnitude: elem for elem in x}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3438,9 +3485,7 @@ def test_to_unstacked_dataset(self, dtype): method("transpose", "y", "x", "z"), method("stack", a=("x", "y")), method("set_index", x="x2"), - pytest.param( - method("shift", x=2), marks=pytest.mark.xfail(reason="strips units") - ), + method("shift", x=2), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), @@ -3466,7 +3511,8 @@ def test_stacking_reordering(self, func, dtype): expected = attach_units(func(strip_units(data_array)), {None: unit_registry.m}) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3476,16 +3522,13 @@ def test_stacking_reordering(self, func, dtype): method("integrate", dim="x"), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), - ), - method("reduce", func=np.sum, dim="x"), - pytest.param( - lambda x: x.dot(x), - id="method_dot", marks=pytest.mark.xfail( - reason="pint does not implement the dot method" + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", ), ), + method("reduce", func=np.sum, dim="x"), + pytest.param(lambda x: x.dot(x), id="method_dot"), ), ids=repr, ) @@ -3512,7 +3555,8 @@ def test_computation(self, func, dtype): expected = attach_units(func(strip_units(data_array)), units) actual = func(data_array) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3522,7 +3566,9 @@ def test_computation(self, func, dtype): method("coarsen", y=2), pytest.param( method("rolling", y=3), - marks=pytest.mark.xfail(reason="rolling strips units"), + marks=pytest.mark.xfail( + reason="numpy.lib.stride_tricks.as_strided converts to ndarray" + ), ), pytest.param( method("rolling_exp", y=3), @@ -3545,7 +3591,8 @@ def test_computation_objects(self, func, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_allclose(expected, actual) def test_resample(self, dtype): array = np.linspace(0, 5, 10).astype(dtype) * unit_registry.m @@ -3559,7 +3606,8 @@ def test_resample(self, dtype): expected = attach_units(func(strip_units(data_array)).mean(), units) actual = func(data_array).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize( "func", @@ -3569,7 +3617,10 @@ def test_resample(self, dtype): method("last"), pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) < "0.12", + reason="quantile / nanquantile not implemented yet", + ), ), ), ids=repr, @@ -3598,7 +3649,8 @@ def test_grouped_operations(self, func, dtype): ) actual = func(data_array.groupby("y")) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + xr.testing.assert_identical(expected, actual) class TestDataset: From 6ce07249ca7eabc181b2b88a5723e66fc06036d4 Mon Sep 17 00:00:00 2001 From: Maik Riechert Date: Sun, 3 May 2020 14:34:26 +0100 Subject: [PATCH 08/71] fix to_netcdf docstring typo (#4021) --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dd7871eaf3a..01dda828d8a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1535,7 +1535,7 @@ def to_netcdf( ``dask.delayed.Delayed`` object that can be computed later. invalid_netcdf: boolean Only valid along with engine='h5netcdf'. If True, allow writing - hdf5 files which are valid netcdf as described in + hdf5 files which are invalid netcdf as described in https://github.com/shoyer/h5netcdf. Default: False. """ if encoding is None: From 1b3c76863041d3265e5d011e68482944c447d78f Mon Sep 17 00:00:00 2001 From: Prajjwal Nijhara Date: Tue, 5 May 2020 07:27:30 +0530 Subject: [PATCH 09/71] chore: Remove unnecessary comprehension (#4026) * chore: Remove unnecessary comprehension * Update whats-new.rst --- doc/whats-new.rst | 4 ++++ xarray/core/groupby.py | 2 +- xarray/core/pdcompat.py | 2 +- xarray/core/variable.py | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 051a41a57e5..1993e543322 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -129,6 +129,10 @@ Internal Changes - Use ``async`` / ``await`` for the asynchronous distributed tests. (:issue:`3987`, :pull:`3989`) By `Justus Magin `_. +- Remove unnecessary comprehensions becuase the built-in functions like + ``all``, ``any``, ``enumerate``, ``sum``, ``tuple`` etc. can work directly with a + generator expression. (:pull:`4026`) + By `Prajjwal Nijhara `_. .. _whats-new.0.15.1: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 148e16863d1..85dd735c2fe 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -29,7 +29,7 @@ def check_reduce_dims(reduce_dims, dimensions): if reduce_dims is not ...: if is_scalar(reduce_dims): reduce_dims = [reduce_dims] - if any([dim not in dimensions for dim in reduce_dims]): + if any(dim not in dimensions for dim in reduce_dims): raise ValueError( "cannot reduce over dimensions %r. expected either '...' to reduce over all dimensions or one or more of %r." % (reduce_dims, dimensions) diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py index f2e4518e0dc..f2e22329fc8 100644 --- a/xarray/core/pdcompat.py +++ b/xarray/core/pdcompat.py @@ -55,4 +55,4 @@ def count_not_none(*args) -> int: Copied from pandas.core.common.count_not_none (not part of the public API) """ - return sum([arg is not None for arg in args]) + return sum(arg is not None for arg in args) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 68e823ca426..e19132b1b06 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2412,7 +2412,7 @@ def assert_unique_multiindex_level_names(variables): duplicate_names = [v for v in level_names.values() if len(v) > 1] if duplicate_names: - conflict_str = "\n".join([", ".join(v) for v in duplicate_names]) + conflict_str = "\n".join(", ".join(v) for v in duplicate_names) raise ValueError("conflicting MultiIndex level name(s):\n%s" % conflict_str) # Check confliction between level names and dimensions GH:2299 for k, v in variables.items(): From 1c5adc9fba6dcde73f31282719d3d8614e54f59b Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 5 May 2020 12:28:01 -0700 Subject: [PATCH 10/71] Support overriding existing variables in to_zarr() without appending (#4029) * Support overriding existing variables in to_zarr() without appending This should be useful for cases where users want to update values in existing Zarr datasets. * Update docstring for to_zarr --- doc/whats-new.rst | 4 ++- xarray/backends/api.py | 35 ++++++++++++++----- xarray/backends/zarr.py | 31 ++++++++++------- xarray/core/dataset.py | 13 +++---- xarray/tests/test_backends.py | 65 +++++++++++++---------------------- 5 files changed, 79 insertions(+), 69 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1993e543322..cdec7d81bbc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -49,7 +49,9 @@ New Features By `Todd Jennings `_ - More support for unit aware arrays with pint (:pull:`3643`) By `Justus Magin `_. - +- Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even + without ``append_dim``, as long as dimension sizes do not change. + By `Stephan Hoyer `_. - Allow plotting of boolean arrays. (:pull:`3766`) By `Marek Jacob `_ - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c7481e22b59..184aad579a2 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1279,18 +1279,35 @@ def _validate_append_dim_and_encoding( return if append_dim: if append_dim not in ds.dims: - raise ValueError(f"{append_dim} not a valid dimension in the Dataset") - for data_var in ds_to_append: - if data_var in ds: - if append_dim is None: + raise ValueError( + f"append_dim={append_dim!r} does not match any existing " + f"dataset dimensions {ds.dims}" + ) + for var_name in ds_to_append: + if var_name in ds: + if ds_to_append[var_name].dims != ds[var_name].dims: + raise ValueError( + f"variable {var_name!r} already exists with different " + f"dimension names {ds[var_name].dims} != " + f"{ds_to_append[var_name].dims}, but changing variable " + "dimensions is not supported by to_zarr()." + ) + existing_sizes = { + k: v for k, v in ds[var_name].sizes.items() if k != append_dim + } + new_sizes = { + k: v for k, v in ds_to_append[var_name].sizes.items() if k != append_dim + } + if existing_sizes != new_sizes: raise ValueError( - "variable '{}' already exists, but append_dim " - "was not set".format(data_var) + f"variable {var_name!r} already exists with different " + "dimension sizes: {existing_sizes} != {new_sizes}. " + "to_zarr() only supports changing dimension sizes when " + f"explicitly appending, but append_dim={append_dim!r}." ) - if data_var in encoding.keys(): + if var_name in encoding.keys(): raise ValueError( - "variable '{}' already exists, but encoding was" - "provided".format(data_var) + f"variable {var_name!r} already exists, but encoding was provided" ) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 973c167911e..de6b627447e 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -445,18 +445,23 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No fill_value = attrs.pop("_FillValue", None) if v.encoding == {"_FillValue": None} and fill_value is None: v.encoding = {} - if name in self.ds: + + if self.append_dim is not None and self.append_dim in dims: + # resize existing variable zarr_array = self.ds[name] - if self.append_dim in dims: - # this is the DataArray that has append_dim as a - # dimension - append_axis = dims.index(self.append_dim) - new_shape = list(zarr_array.shape) - new_shape[append_axis] += v.shape[append_axis] - new_region = [slice(None)] * len(new_shape) - new_region[append_axis] = slice(zarr_array.shape[append_axis], None) - zarr_array.resize(new_shape) - writer.add(v.data, zarr_array, region=tuple(new_region)) + append_axis = dims.index(self.append_dim) + + new_region = [slice(None)] * len(dims) + new_region[append_axis] = slice(zarr_array.shape[append_axis], None) + region = tuple(new_region) + + new_shape = list(zarr_array.shape) + new_shape[append_axis] += v.shape[append_axis] + zarr_array.resize(new_shape) + elif name in self.ds: + # override existing variable + zarr_array = self.ds[name] + region = None else: # new variable encoding = extract_zarr_variable_encoding( @@ -474,7 +479,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No name, shape=shape, dtype=dtype, fill_value=fill_value, **encoding ) zarr_array.attrs.put(encoded_attrs) - writer.add(v.data, zarr_array) + region = None + + writer.add(v.data, zarr_array, region=region) def close(self): if self._consolidate_on_close: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 01dda828d8a..2a8b7bdbb9a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1579,7 +1579,7 @@ def to_zarr( mode : {'w', 'w-', 'a', None} Persistence mode: 'w' means create (overwrite if exists); 'w-' means create (fail if exists); - 'a' means append (create if does not exist). + 'a' means override existing variables (create if does not exist). If ``append_dim`` is set, ``mode`` can be omitted as it is internally set to ``'a'``. Otherwise, ``mode`` will default to `w-` if not set. @@ -1598,7 +1598,8 @@ def to_zarr( If True, apply zarr's `consolidate_metadata` function to the store after writing. append_dim: hashable, optional - If set, the dimension on which the data will be appended. + If set, the dimension along which the data will be appended. All + other dimensions on overriden variables must remain the same size. References ---------- @@ -1766,7 +1767,7 @@ def maybe_chunk(name, var, chunks): return self._replace(variables) def _validate_indexers( - self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise", + self, indexers: Mapping[Hashable, Any], missing_dims: str = "raise" ) -> Iterator[Tuple[Hashable, Union[int, slice, np.ndarray, Variable]]]: """ Here we make sure + indexer has a valid keys @@ -5933,7 +5934,7 @@ def polyfit( "The number of data points must exceed order to scale the covariance matrix." ) fac = residuals / (x.shape[0] - order) - covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j"),) * fac + covariance = xr.DataArray(Vbase, dims=("cov_i", "cov_j")) * fac variables[name + "polyfit_covariance"] = covariance return Dataset(data_vars=variables, attrs=self.attrs.copy()) @@ -6199,7 +6200,7 @@ def idxmin( skipna=skipna, fill_value=fill_value, keep_attrs=keep_attrs, - ), + ) ) def idxmax( @@ -6297,7 +6298,7 @@ def idxmax( skipna=skipna, fill_value=fill_value, keep_attrs=keep_attrs, - ), + ) ) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 916c29ba7bd..90deea51d2a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1526,12 +1526,6 @@ def roundtrip( with self.open(store_target, **open_kwargs) as ds: yield ds - @contextlib.contextmanager - def roundtrip_append( - self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False - ): - pytest.skip("zarr backend does not support appending") - def test_roundtrip_consolidated(self): pytest.importorskip("zarr", minversion="2.2.1.dev2") expected = create_test_data() @@ -1826,7 +1820,7 @@ def test_encoding_kwarg_fixed_width_string(self): # not relevant for zarr, since we don't use EncodedStringCoder pass - # TODO: someone who understand caching figure out whether chaching + # TODO: someone who understand caching figure out whether caching # makes sense for Zarr backend @pytest.mark.xfail(reason="Zarr caching not implemented") def test_dataset_caching(self): @@ -1834,55 +1828,44 @@ def test_dataset_caching(self): @pytest.mark.skipif(LooseVersion(dask_version) < "2.4", reason="dask GH5334") def test_append_write(self): - ds, ds_to_append, _ = create_append_test_data() - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") - ds_to_append.to_zarr(store_target, append_dim="time") - original = xr.concat([ds, ds_to_append], dim="time") - assert_identical(original, xr.open_zarr(store_target)) - - @pytest.mark.xfail(reason="Zarr stores can not be appended to") - def test_append_overwrite_values(self): - super().test_append_overwrite_values() + super().test_append_write() def test_append_with_invalid_dim_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim not valid - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises( + ValueError, match="does not match any existing dataset dimensions" + ): ds_to_append.to_zarr(store_target, append_dim="notvalid") - def test_append_with_append_dim_not_set_raises(self): + def test_append_with_no_dims_raises(self): + with self.create_zarr_target() as store_target: + Dataset({"foo": ("x", [1])}).to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="different dimension names"): + Dataset({"foo": ("y", [2])}).to_zarr(store_target, mode="a") + def test_append_with_append_dim_not_set_raises(self): ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim not set - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="different dimension sizes"): ds_to_append.to_zarr(store_target, mode="a") def test_append_with_mode_not_a_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when append_dim is set and mode != 'a' - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises( + ValueError, match="append_dim was set along with mode='w'" + ): ds_to_append.to_zarr(store_target, mode="w", append_dim="time") def test_append_with_existing_encoding_raises(self): - ds, ds_to_append, _ = create_append_test_data() - - # check failure when providing encoding to existing variable - with pytest.raises(ValueError): - with self.create_zarr_target() as store_target: - ds.to_zarr(store_target, mode="w") + with self.create_zarr_target() as store_target: + ds.to_zarr(store_target, mode="w") + with pytest.raises(ValueError, match="but encoding was provided"): ds_to_append.to_zarr( store_target, append_dim="time", From 59b470f5d1464366dc55b082618ea87da8fbc9af Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 5 May 2020 14:49:25 -0700 Subject: [PATCH 11/71] Allow warning with cartopy in docs plotting build (#4032) It looks like this is triggered by the new cartopy version now being installed on RTD (version 0.17.0 -> 0.18.0). Long term we should fix this, but for now it's better just to disable the warning. Here's the message from RTD: ``` Exception occurred: File "/home/docs/checkouts/readthedocs.org/user_builds/xray/conda/latest/lib/python3.8/site-packages/IPython/sphinxext/ipython_directive.py", line 586, in process_input raise RuntimeError('Non Expected warning in `{}` line {}'.format(filename, lineno)) RuntimeError: Non Expected warning in `/home/docs/checkouts/readthedocs.org/user_builds/xray/checkouts/latest/doc/plotting.rst` line 732 The full traceback has been saved in /tmp/sphinx-err-qav6jjmm.log, if you want to report the issue to the developers. Please also report this if it was a user error, so that a better error message can be provided next time. A bug report can be filed in the tracker at . Thanks! >>>------------------------------------------------------------------------- Warning in /home/docs/checkouts/readthedocs.org/user_builds/xray/checkouts/latest/doc/plotting.rst at block ending on line 732 Specify :okwarning: as an option in the ipython:: block to suppress this message ---------------------------------------------------------------------------- /home/docs/checkouts/readthedocs.org/user_builds/xray/checkouts/latest/xarray/plot/facetgrid.py:373: UserWarning: Tight layout not applied. The left and right margins cannot be made large enough to accommodate all axes decorations. self.fig.tight_layout() <<<------------------------------------------------------------------------- ``` https://readthedocs.org/projects/xray/builds/10969146/ --- doc/plotting.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/plotting.rst b/doc/plotting.rst index fb30417e2c6..40c0ca1a496 100644 --- a/doc/plotting.rst +++ b/doc/plotting.rst @@ -717,6 +717,7 @@ function using the ``subplot_kws`` keyword. The axes for the subplots created by faceting are accessible in the object returned by ``plot``: .. ipython:: python + :okwarning: p = air.isel(time=[0, 4]).plot( transform=ccrs.PlateCarree(), From 9ec3f7b44d50ffa2298a9796847e69953ae96cbd Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 5 May 2020 18:50:20 -0700 Subject: [PATCH 12/71] Remove broken test for Panel with to_pandas() (#4028) * Remove broken test for Panel with to_pandas() We don't support creating a Panel with to_pandas() with *any* version of pandas at present, so this test was previous broken if pandas < 0.25 was isntalled. * remove unused import * Fixup LooseVersion import --- xarray/tests/test_dataset.py | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index a1cb7361e77..2a89920766c 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -32,7 +32,6 @@ from . import ( InaccessibleArray, - LooseVersion, UnexpectedDataAccess, assert_allclose, assert_array_equal, @@ -496,16 +495,11 @@ def test_constructor_pandas_single(self): DataArray(np.random.rand(4, 3), dims=["a", "b"]), # df ] - if LooseVersion(pd.__version__) < "0.25.0": - das.append(DataArray(np.random.rand(4, 3, 2), dims=["a", "b", "c"])) - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", r"\W*Panel is deprecated") - for a in das: - pandas_obj = a.to_pandas() - ds_based_on_pandas = Dataset(pandas_obj) - for dim in ds_based_on_pandas.data_vars: - assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim]) + for a in das: + pandas_obj = a.to_pandas() + ds_based_on_pandas = Dataset(pandas_obj) + for dim in ds_based_on_pandas.data_vars: + assert_array_equal(ds_based_on_pandas[dim], pandas_obj[dim]) def test_constructor_compat(self): data = {"x": DataArray(0, coords={"y": 1}), "y": ("z", [1, 1, 1])} From fe7962a7016dc9aa25e54cc857efa4aa52baed8a Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 6 May 2020 12:39:34 -0400 Subject: [PATCH 13/71] Transpose coords by default (#3824) * transpose coords by default * whatsnew * Update doc/whats-new.rst Co-authored-by: crusaderky * Update whats-new.rst Co-authored-by: crusaderky --- doc/whats-new.rst | 6 ++++++ xarray/core/dataarray.py | 14 ++------------ xarray/core/groupby.py | 19 ++----------------- xarray/tests/test_dataarray.py | 6 ------ 4 files changed, 10 insertions(+), 35 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cdec7d81bbc..1204155f062 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,12 @@ v0.16.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ + +- ``groupby`` operations will restore coord dimension order. Pass ``restore_coord_dims=False`` + to revert to previous behavior. +- :meth:`DataArray.transpose` will now transpose coordinates by default. + Pass ``transpose_coords=False`` to revert to previous behaviour. + By `Maximilian Roos `_ - Alternate draw styles for :py:meth:`plot.step` must be passed using the ``drawstyle`` (or ``ds``) keyword argument, instead of the ``linestyle`` (or ``ls``) keyword argument, in line with the `upstream change in Matplotlib diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5ced7e251c4..fc9e3410247 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1,6 +1,5 @@ import datetime import functools -import warnings from numbers import Number from typing import ( TYPE_CHECKING, @@ -1915,7 +1914,7 @@ def to_unstacked_dataset(self, dim, level=0): # unstacked dataset return Dataset(data_dict) - def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArray": + def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArray": """Return a new DataArray object with transposed dimensions. Parameters @@ -1923,7 +1922,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra *dims : hashable, optional By default, reverse the dimensions. Otherwise, reorder the dimensions to this order. - transpose_coords : boolean, optional + transpose_coords : boolean, default True If True, also transpose the coordinates of this DataArray. Returns @@ -1952,15 +1951,6 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = None) -> "DataArra coords[name] = coord.variable.transpose(*coord_dims) return self._replace(variable, coords) else: - if transpose_coords is None and any(self[c].ndim > 1 for c in self.coords): - warnings.warn( - "This DataArray contains multi-dimensional " - "coordinates. In the future, these coordinates " - "will be transposed as well unless you specify " - "transpose_coords=False.", - FutureWarning, - stacklevel=2, - ) return self._replace(variable) @property diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 85dd735c2fe..299cb8ec4fa 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -272,7 +272,7 @@ def __init__( squeeze=False, grouper=None, bins=None, - restore_coord_dims=None, + restore_coord_dims=True, cut_kwargs=None, ): """Create a GroupBy object @@ -292,7 +292,7 @@ def __init__( bins : array-like, optional If `bins` is specified, the groups will be discretized into the specified bins by `pandas.cut`. - restore_coord_dims : bool, optional + restore_coord_dims : bool, default True If True, also restore the dimension order of multi-dimensional coordinates. cut_kwargs : dict, optional @@ -389,21 +389,6 @@ def __init__( "Failed to group data. Are you grouping by a variable that is all NaN?" ) - if ( - isinstance(obj, DataArray) - and restore_coord_dims is None - and any(obj[c].ndim > 1 for c in obj.coords) - ): - warnings.warn( - "This DataArray contains multi-dimensional " - "coordinates. In the future, the dimension order " - "of these coordinates will be restored as well " - "unless you specify restore_coord_dims=False.", - FutureWarning, - stacklevel=2, - ) - restore_coord_dims = False - # specification for the groupby operation self._obj = obj self._group = group diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index c3e5aafabfe..6984d5361d2 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2161,9 +2161,6 @@ def test_transpose(self): with pytest.raises(ValueError): da.transpose("x", "y") - with pytest.warns(FutureWarning): - da.transpose() - def test_squeeze(self): assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable) @@ -2753,9 +2750,6 @@ def test_groupby_restore_coord_dims(self): )["c"] assert result.dims == expected_dims - with pytest.warns(FutureWarning): - array.groupby("x").map(lambda x: x.squeeze()) - def test_groupby_first_and_last(self): array = DataArray([1, 2, 3, 4, 5], dims="x") by = DataArray(["a"] * 2 + ["b"] * 3, dims="x", name="ab") From 0b6e22f6af71cc19d2e0575ecdf5c66109c314dd Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 6 May 2020 16:41:53 +0000 Subject: [PATCH 14/71] Add template xarray object kwarg to map_blocks (#3816) * Allow providing template dataset to map_blocks. * Update dimension shape check. This accounts for dimension sizes being changed by the applied function. * Allow user function to add new unindexed dimension. * Add docstring for template. * renaming * Raise nice error if adding a new chunked dimension, * Raise nice error message when expected dimension is missing on returned object * Revert "Allow user function to add new unindexed dimension." This reverts commit 045ae2b1bf939515e0a38c960d0cdc7974bcfa37. * Add test + fix output_chunks for dataarray template * typing * fix test * Add nice error messages when result doesn't match template. * blacken * Add template kwarg to DataArray.map_blocks & Dataset.map_blocks * minor error message fixes. * docstring updates. * bugfix for expected shapes when template is not specified * Add map_blocks docs. * Update doc/dask.rst Co-Authored-By: Joe Hamman * refactor out slicer for chunks * Check expected index values. * Raise nice error when template object does not have required number of chunks * doc updates. * more review comments. * Mention that attrs are taken from template. * Add test and explicit point out that attrs is copied from template Co-authored-by: Joe Hamman --- doc/api.rst | 5 +- doc/dask.rst | 114 +++++++++++++++++++++++- doc/whats-new.rst | 5 ++ xarray/core/dataarray.py | 30 ++++--- xarray/core/dataset.py | 30 ++++--- xarray/core/parallel.py | 180 +++++++++++++++++++++++++++++--------- xarray/tests/test_dask.py | 69 ++++++++++++++- 7 files changed, 359 insertions(+), 74 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index b37c84e7a81..8ec6843d24a 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -173,6 +173,7 @@ Computation Dataset.quantile Dataset.differentiate Dataset.integrate + Dataset.map_blocks Dataset.polyfit **Aggregation**: @@ -358,6 +359,8 @@ Computation DataArray.integrate DataArray.polyfit DataArray.str + DataArray.map_blocks + **Aggregation**: :py:attr:`~DataArray.all` @@ -518,7 +521,6 @@ Dataset methods Dataset.load Dataset.chunk Dataset.unify_chunks - Dataset.map_blocks Dataset.filter_by_attrs Dataset.info @@ -550,7 +552,6 @@ DataArray methods DataArray.load DataArray.chunk DataArray.unify_chunks - DataArray.map_blocks Coordinates objects =================== diff --git a/doc/dask.rst b/doc/dask.rst index 2248de9c0d8..df223982ba4 100644 --- a/doc/dask.rst +++ b/doc/dask.rst @@ -284,12 +284,21 @@ loaded into Dask or not: .. _dask.automatic-parallelization: -Automatic parallelization -------------------------- +Automatic parallelization with ``apply_ufunc`` and ``map_blocks`` +----------------------------------------------------------------- Almost all of xarray's built-in operations work on Dask arrays. If you want to -use a function that isn't wrapped by xarray, one option is to extract Dask -arrays from xarray objects (``.data``) and use Dask directly. +use a function that isn't wrapped by xarray, and have it applied in parallel on +each block of your xarray object, you have three options: + +1. Extract Dask arrays from xarray objects (``.data``) and use Dask directly. +2. Use :py:func:`~xarray.apply_ufunc` to apply functions that consume and return NumPy arrays. +3. Use :py:func:`~xarray.map_blocks`, :py:meth:`Dataset.map_blocks` or :py:meth:`DataArray.map_blocks` + to apply functions that consume and return xarray objects. + + +``apply_ufunc`` +~~~~~~~~~~~~~~~ Another option is to use xarray's :py:func:`~xarray.apply_ufunc`, which can automate `embarrassingly parallel @@ -400,6 +409,103 @@ application. structure of a problem, unlike the generic speedups offered by ``dask='parallelized'``. + +``map_blocks`` +~~~~~~~~~~~~~~ + +Functions that consume and return xarray objects can be easily applied in parallel using :py:func:`map_blocks`. +Your function will receive an xarray Dataset or DataArray subset to one chunk +along each chunked dimension. + +.. ipython:: python + + ds.temperature + +This DataArray has 3 chunks each with length 10 along the time dimension. +At compute time, a function applied with :py:func:`map_blocks` will receive a DataArray corresponding to a single block of shape 10x180x180 +(time x latitude x longitude) with values loaded. The following snippet illustrates how to check the shape of the object +received by the applied function. + +.. ipython:: python + + def func(da): + print(da.sizes) + return da.time + + mapped = xr.map_blocks(func, ds.temperature) + mapped + +Notice that the :py:meth:`map_blocks` call printed +``Frozen({'time': 0, 'latitude': 0, 'longitude': 0})`` to screen. +``func`` is received 0-sized blocks! :py:meth:`map_blocks` needs to know what the final result +looks like in terms of dimensions, shapes etc. It does so by running the provided function on 0-shaped +inputs (*automated inference*). This works in many cases, but not all. If automatic inference does not +work for your function, provide the ``template`` kwarg (see below). + +In this case, automatic inference has worked so let's check that the result is as expected. + +.. ipython:: python + + mapped.load(scheduler="single-threaded") + mapped.identical(ds.time) + +Note that we use ``.load(scheduler="single-threaded")`` to execute the computation. +This executes the Dask graph in `serial` using a for loop, but allows for printing to screen and other +debugging techniques. We can easily see that our function is receiving blocks of shape 10x180x180 and +the returned result is identical to ``ds.time`` as expected. + + +Here is a common example where automated inference will not work. + +.. ipython:: python + :okexcept: + + def func(da): + print(da.sizes) + return da.isel(time=[1]) + + mapped = xr.map_blocks(func, ds.temperature) + +``func`` cannot be run on 0-shaped inputs because it is not possible to extract element 1 along a +dimension of size 0. In this case we need to tell :py:func:`map_blocks` what the returned result looks +like using the ``template`` kwarg. ``template`` must be an xarray Dataset or DataArray (depending on +what the function returns) with dimensions, shapes, chunk sizes, attributes, coordinate variables *and* data +variables that look exactly like the expected result. The variables should be dask-backed and hence not +incur much memory cost. + +.. note:: + + Note that when ``template`` is provided, ``attrs`` from ``template`` are copied over to the result. Any + ``attrs`` set in ``func`` will be ignored. + + +.. ipython:: python + + template = ds.temperature.isel(time=[1, 11, 21]) + mapped = xr.map_blocks(func, ds.temperature, template=template) + + +Notice that the 0-shaped sizes were not printed to screen. Since ``template`` has been provided +:py:func:`map_blocks` does not need to infer it by running ``func`` on 0-shaped inputs. + +.. ipython:: python + + mapped.identical(template) + + +:py:func:`map_blocks` also allows passing ``args`` and ``kwargs`` down to the user function ``func``. +``func`` will be executed as ``func(block_xarray, *args, **kwargs)`` so ``args`` must be a list and ``kwargs`` must be a dictionary. + +.. ipython:: python + + def func(obj, a, b=0): + return obj + a + b + + mapped = ds.map_blocks(func, args=[10], kwargs={"b": 10}) + expected = ds + 10 + 10 + mapped.identical(expected) + + Chunking and performance ------------------------ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1204155f062..b22a7217568 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -67,6 +67,9 @@ New Features the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`). This feature requires cftime version 1.1.0 or greater. By `Spencer Clark `_. +- :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases + where the result of a computation could not be inferred automatically. + By `Deepak Cherian `_ Bug fixes ~~~~~~~~~ @@ -123,6 +126,8 @@ Documentation By `Matthias Riße `_. - Apply ``black`` to all the code in the documentation (:pull:`4012`) By `Justus Magin `_. +- Narrative documentation now describes :py:meth:`map_blocks`. :ref:`dask.automatic-parallelization`. + By `Deepak Cherian `_. Internal Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index fc9e3410247..236938bac74 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -3250,27 +3250,25 @@ def map_blocks( func: "Callable[..., T_DSorDA]", args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union["DataArray", "Dataset"] = None, ) -> "T_DSorDA": """ - Apply a function to each chunk of this DataArray. This method is experimental - and its signature may change. + Apply a function to each block of this DataArray. + + .. warning:: + This method is experimental and its signature may change. Parameters ---------- func: callable - User-provided function that accepts a DataArray as its first parameter. The - function will receive a subset of this DataArray, corresponding to one chunk - along each chunked dimension. ``func`` will be executed as - ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like this array - but has sizes 0, to determine properties of the returned object such as - dtype, variable names, new dimensions and new indexes (if any). + User-provided function that accepts a DataArray as its first + parameter. The function will receive a subset, i.e. one block, of this DataArray + (see below), corresponding to one chunk along each chunked dimension. ``func`` will be + executed as ``func(block_subset, *args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. args: Sequence Passed verbatim to func after unpacking, after the sliced DataArray. xarray objects, if any, will not be split by chunks. Passing dask collections is @@ -3278,6 +3276,12 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. Returns ------- @@ -3300,7 +3304,7 @@ def map_blocks( """ from .parallel import map_blocks - return map_blocks(func, self, args, kwargs) + return map_blocks(func, self, args, kwargs, template) def polyfit( self, diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2a8b7bdbb9a..3a55f3eca27 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5709,27 +5709,25 @@ def map_blocks( func: "Callable[..., T_DSorDA]", args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union["DataArray", "Dataset"] = None, ) -> "T_DSorDA": """ - Apply a function to each chunk of this Dataset. This method is experimental and - its signature may change. + Apply a function to each block of this Dataset. + + .. warning:: + This method is experimental and its signature may change. Parameters ---------- func: callable - User-provided function that accepts a Dataset as its first parameter. The - function will receive a subset of this Dataset, corresponding to one chunk - along each chunked dimension. ``func`` will be executed as - ``func(obj_subset, *args, **kwargs)``. - - The function will be first run on mocked-up data, that looks like this - Dataset but has sizes 0, to determine properties of the returned object such - as dtype, variable names, new dimensions and new indexes (if any). + User-provided function that accepts a Dataset as its first + parameter. The function will receive a subset, i.e. one block, of this Dataset + (see below), corresponding to one chunk along each chunked dimension. ``func`` will be + executed as ``func(block_subset, *args, **kwargs)``. This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. args: Sequence Passed verbatim to func after unpacking, after the sliced DataArray. xarray objects, if any, will not be split by chunks. Passing dask collections is @@ -5737,6 +5735,12 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. Returns ------- @@ -5759,7 +5763,7 @@ def map_blocks( """ from .parallel import map_blocks - return map_blocks(func, self, args, kwargs) + return map_blocks(func, self, args, kwargs, template) def polyfit( self, diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index 6f1668f698f..d91dfb4a275 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -31,6 +31,30 @@ T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset) +def check_result_variables( + result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str +): + + if kind == "coords": + nice_str = "coordinate" + elif kind == "data_vars": + nice_str = "data" + + # check that coords and data variables are as expected + missing = expected[kind] - set(getattr(result, kind)) + if missing: + raise ValueError( + "Result from applying user function does not contain " + f"{nice_str} variables {missing}." + ) + extra = set(getattr(result, kind)) - expected[kind] + if extra: + raise ValueError( + "Result from applying user function has unexpected " + f"{nice_str} variables {extra}." + ) + + def dataset_to_dataarray(obj: Dataset) -> DataArray: if not isinstance(obj, Dataset): raise TypeError("Expected Dataset, got %s" % type(obj)) @@ -80,7 +104,8 @@ def infer_template( template = func(*meta_args, **kwargs) except Exception as e: raise Exception( - "Cannot infer object returned from running user provided function." + "Cannot infer object returned from running user provided function. " + "Please supply the 'template' kwarg to map_blocks." ) from e if not isinstance(template, (Dataset, DataArray)): @@ -102,14 +127,24 @@ def make_dict(x: Union[DataArray, Dataset]) -> Dict[Hashable, Any]: return {k: v.data for k, v in x.variables.items()} +def _get_chunk_slicer(dim: Hashable, chunk_index: Mapping, chunk_bounds: Mapping): + if dim in chunk_index: + which_chunk = chunk_index[dim] + return slice(chunk_bounds[dim][which_chunk], chunk_bounds[dim][which_chunk + 1]) + return slice(None) + + def map_blocks( func: Callable[..., T_DSorDA], obj: Union[DataArray, Dataset], args: Sequence[Any] = (), kwargs: Mapping[str, Any] = None, + template: Union[DataArray, Dataset] = None, ) -> T_DSorDA: - """Apply a function to each chunk of a DataArray or Dataset. This function is - experimental and its signature may change. + """Apply a function to each block of a DataArray or Dataset. + + .. warning:: + This function is experimental and its signature may change. Parameters ---------- @@ -119,14 +154,10 @@ def map_blocks( corresponding to one chunk along each chunked dimension. ``func`` will be executed as ``func(obj_subset, *args, **kwargs)``. - The function will be first run on mocked-up data, that looks like 'obj' but - has sizes 0, to determine properties of the returned object such as dtype, - variable names, new dimensions and new indexes (if any). - This function must return either a single DataArray or a single Dataset. - This function cannot change size of existing dimensions, or add new chunked - dimensions. + This function cannot add a new chunked dimension. + obj: DataArray, Dataset Passed to the function as its first argument, one dask chunk at a time. args: Sequence @@ -135,6 +166,15 @@ def map_blocks( kwargs: Mapping Passed verbatim to func after unpacking. xarray objects, if any, will not be split by chunks. Passing dask collections is not allowed. + template: (optional) DataArray, Dataset + xarray object representing the final result after compute is called. If not provided, + the function will be first run on mocked-up data, that looks like 'obj' but + has sizes 0, to determine properties of the returned object such as dtype, + variable names, attributes, new dimensions and new indexes (if any). + 'template' must be provided if the function changes the size of existing dimensions. + When provided, `attrs` on variables in `template` are copied over to the result. Any + `attrs` set by `func` will be ignored. + Returns ------- @@ -201,22 +241,47 @@ def map_blocks( * time (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00 """ - def _wrapper(func, obj, to_array, args, kwargs): + def _wrapper(func, obj, to_array, args, kwargs, expected): + check_shapes = dict(obj.dims) + check_shapes.update(expected["shapes"]) + if to_array: obj = dataset_to_dataarray(obj) result = func(obj, *args, **kwargs) + # check all dims are present + missing_dimensions = set(expected["shapes"]) - set(result.sizes) + if missing_dimensions: + raise ValueError( + f"Dimensions {missing_dimensions} missing on returned object." + ) + + # check that index lengths and values are as expected for name, index in result.indexes.items(): - if name in obj.indexes: - if len(index) != len(obj.indexes[name]): + if name in check_shapes: + if len(index) != check_shapes[name]: raise ValueError( - "Length of the %r dimension has changed. This is not allowed." - % name + f"Received dimension {name!r} of length {len(index)}. Expected length {check_shapes[name]}." ) + if name in expected["indexes"]: + expected_index = expected["indexes"][name] + if not index.equals(expected_index): + raise ValueError( + f"Expected index {name!r} to be {expected_index!r}. Received {index!r} instead." + ) + + # check that all expected variables were returned + check_result_variables(result, expected, "coords") + if isinstance(result, Dataset): + check_result_variables(result, expected, "data_vars") return make_dict(result) + if template is not None and not isinstance(template, (DataArray, Dataset)): + raise TypeError( + f"template must be a DataArray or Dataset. Received {type(template).__name__} instead." + ) if not isinstance(args, Sequence): raise TypeError("args must be a sequence (for example, a list or tuple).") if kwargs is None: @@ -248,8 +313,38 @@ def _wrapper(func, obj, to_array, args, kwargs): input_is_array = False input_chunks = dataset.chunks + dataset_indexes = set(dataset.indexes) + if template is None: + # infer template by providing zero-shaped arrays + template = infer_template(func, obj, *args, **kwargs) + template_indexes = set(template.indexes) + preserved_indexes = template_indexes & dataset_indexes + new_indexes = template_indexes - dataset_indexes + indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} + indexes.update({k: template.indexes[k] for k in new_indexes}) + output_chunks = { + dim: input_chunks[dim] for dim in template.dims if dim in input_chunks + } + + else: + # template xarray object has been provided with proper sizes and chunk shapes + template_indexes = set(template.indexes) + indexes = {dim: dataset.indexes[dim] for dim in dataset_indexes} + indexes.update({k: template.indexes[k] for k in template_indexes}) + if isinstance(template, DataArray): + output_chunks = dict(zip(template.dims, template.chunks)) # type: ignore + else: + output_chunks = template.chunks # type: ignore + + for dim in output_chunks: + if dim in input_chunks and len(input_chunks[dim]) != len(output_chunks[dim]): + raise ValueError( + "map_blocks requires that one block of the input maps to one block of output. " + f"Expected number of output chunks along dimension {dim!r} to be {len(input_chunks[dim])}. " + f"Received {len(output_chunks[dim])} instead. Please provide template if not provided, or " + "fix the provided template." + ) - template: Union[DataArray, Dataset] = infer_template(func, obj, *args, **kwargs) if isinstance(template, DataArray): result_is_array = True template_name = template.name @@ -261,13 +356,6 @@ def _wrapper(func, obj, to_array, args, kwargs): f"func output must be DataArray or Dataset; got {type(template)}" ) - template_indexes = set(template.indexes) - dataset_indexes = set(dataset.indexes) - preserved_indexes = template_indexes & dataset_indexes - new_indexes = template_indexes - dataset_indexes - indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes} - indexes.update({k: template.indexes[k] for k in new_indexes}) - # We're building a new HighLevelGraph hlg. We'll have one new layer # for each variable in the dataset, which is the result of the # func applied to the values. @@ -281,13 +369,16 @@ def _wrapper(func, obj, to_array, args, kwargs): # map dims to list of chunk indexes ichunk = {dim: range(len(chunks_v)) for dim, chunks_v in input_chunks.items()} # mapping from chunk index to slice bounds - chunk_index_bounds = { + input_chunk_bounds = { dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in input_chunks.items() } + output_chunk_bounds = { + dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in output_chunks.items() + } # iterate over all possible chunk combinations for v in itertools.product(*ichunk.values()): - chunk_index_dict = dict(zip(dataset.dims, v)) + chunk_index = dict(zip(dataset.dims, v)) # this will become [[name1, variable1], # [name2, variable2], @@ -302,9 +393,9 @@ def _wrapper(func, obj, to_array, args, kwargs): # recursively index into dask_keys nested list to get chunk chunk = variable.__dask_keys__() for dim in variable.dims: - chunk = chunk[chunk_index_dict[dim]] + chunk = chunk[chunk_index[dim]] - chunk_variable_task = (f"{gname}-{chunk[0]}",) + v + chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + v graph[chunk_variable_task] = ( tuple, [variable.dims, chunk, variable.attrs], @@ -312,15 +403,10 @@ def _wrapper(func, obj, to_array, args, kwargs): else: # non-dask array with possibly chunked dimensions # index into variable appropriately - subsetter = {} - for dim in variable.dims: - if dim in chunk_index_dict: - which_chunk = chunk_index_dict[dim] - subsetter[dim] = slice( - chunk_index_bounds[dim][which_chunk], - chunk_index_bounds[dim][which_chunk + 1], - ) - + subsetter = { + dim: _get_chunk_slicer(dim, chunk_index, input_chunk_bounds) + for dim in variable.dims + } subset = variable.isel(subsetter) chunk_variable_task = ( "{}-{}".format(gname, dask.base.tokenize(subset)), @@ -336,6 +422,20 @@ def _wrapper(func, obj, to_array, args, kwargs): else: data_vars.append([name, chunk_variable_task]) + # expected["shapes", "coords", "data_vars", "indexes"] are used to raise nice error messages in _wrapper + expected = {} + # input chunk 0 along a dimension maps to output chunk 0 along the same dimension + # even if length of dimension is changed by the applied function + expected["shapes"] = { + k: output_chunks[k][v] for k, v in chunk_index.items() if k in output_chunks + } + expected["data_vars"] = set(template.data_vars.keys()) # type: ignore + expected["coords"] = set(template.coords.keys()) # type: ignore + expected["indexes"] = { + dim: indexes[dim][_get_chunk_slicer(dim, chunk_index, output_chunk_bounds)] + for dim in indexes + } + from_wrapper = (gname,) + v graph[from_wrapper] = ( _wrapper, @@ -344,6 +444,7 @@ def _wrapper(func, obj, to_array, args, kwargs): input_is_array, args, kwargs, + expected, ) # mapping from variable name to dask graph key @@ -356,10 +457,11 @@ def _wrapper(func, obj, to_array, args, kwargs): key: Tuple[Any, ...] = (gname_l,) for dim in variable.dims: - if dim in chunk_index_dict: - key += (chunk_index_dict[dim],) + if dim in chunk_index: + key += (chunk_index[dim],) else: # unchunked dimensions in the input have one chunk in the result + # output can have new dimensions with exactly one chunk key += (0,) # We're adding multiple new layers to the graph: @@ -382,8 +484,8 @@ def _wrapper(func, obj, to_array, args, kwargs): dims = template[name].dims var_chunks = [] for dim in dims: - if dim in input_chunks: - var_chunks.append(input_chunks[dim]) + if dim in output_chunks: + var_chunks.append(output_chunks[dim]) elif dim in indexes: var_chunks.append((len(indexes[dim]),)) elif dim in template.dims: diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 538dbbfb58b..75beb3757ca 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1039,7 +1039,7 @@ def test_map_blocks_error(map_da, map_ds): def bad_func(darray): return (darray * darray.x + 5 * darray.y)[:1, :1] - with raises_regex(ValueError, "Length of the.* has changed."): + with raises_regex(ValueError, "Received dimension 'x' of length 1"): xr.map_blocks(bad_func, map_da).compute() def returns_numpy(darray): @@ -1109,6 +1109,11 @@ def add_attrs(obj): assert_identical(actual, expected) + # when template is specified, attrs are copied from template, not set by function + with raise_if_dask_computes(): + actual = xr.map_blocks(add_attrs, obj, template=obj) + assert_identical(actual, obj) + def test_map_blocks_change_name(map_da): def change_name(obj): @@ -1150,7 +1155,7 @@ def test_map_blocks_to_array(map_ds): lambda x: x.expand_dims(k=3), lambda x: x.assign_coords(new_coord=("y", x.y * 2)), lambda x: x.astype(np.int32), - # TODO: [lambda x: x.isel(x=1).drop_vars("x"), map_da], + lambda x: x.x, ], ) def test_map_blocks_da_transformations(func, map_da): @@ -1170,7 +1175,7 @@ def test_map_blocks_da_transformations(func, map_da): lambda x: x.expand_dims(k=[1, 2, 3]), lambda x: x.expand_dims(k=3), lambda x: x.rename({"a": "new1", "b": "new2"}), - # TODO: [lambda x: x.isel(x=1)], + lambda x: x.x, ], ) def test_map_blocks_ds_transformations(func, map_ds): @@ -1180,6 +1185,64 @@ def test_map_blocks_ds_transformations(func, map_ds): assert_identical(actual, func(map_ds)) +@pytest.mark.parametrize("obj", [make_da(), make_ds()]) +def test_map_blocks_da_ds_with_template(obj): + func = lambda x: x.isel(x=[1]) + template = obj.isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, obj, template=template) + assert_identical(actual, template) + + with raise_if_dask_computes(): + actual = obj.map_blocks(func, template=template) + assert_identical(actual, template) + + +def test_map_blocks_template_convert_object(): + da = make_da() + func = lambda x: x.to_dataset().isel(x=[1]) + template = da.to_dataset().isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, da, template=template) + assert_identical(actual, template) + + ds = da.to_dataset() + func = lambda x: x.to_array().isel(x=[1]) + template = ds.to_array().isel(x=[1, 5, 9]) + with raise_if_dask_computes(): + actual = xr.map_blocks(func, ds, template=template) + assert_identical(actual, template) + + +@pytest.mark.parametrize("obj", [make_da(), make_ds()]) +def test_map_blocks_errors_bad_template(obj): + with raises_regex(ValueError, "unexpected coordinate variables"): + xr.map_blocks(lambda x: x.assign_coords(a=10), obj, template=obj).compute() + with raises_regex(ValueError, "does not contain coordinate variables"): + xr.map_blocks(lambda x: x.drop_vars("cxy"), obj, template=obj).compute() + with raises_regex(ValueError, "Dimensions {'x'} missing"): + xr.map_blocks(lambda x: x.isel(x=1), obj, template=obj).compute() + with raises_regex(ValueError, "Received dimension 'x' of length 1"): + xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=obj).compute() + with raises_regex(TypeError, "must be a DataArray"): + xr.map_blocks(lambda x: x.isel(x=[1]), obj, template=(obj,)).compute() + with raises_regex(ValueError, "map_blocks requires that one block"): + xr.map_blocks( + lambda x: x.isel(x=[1]).assign_coords(x=10), obj, template=obj.isel(x=[1]) + ).compute() + with raises_regex(ValueError, "Expected index 'x' to be"): + xr.map_blocks( + lambda a: a.isel(x=[1]).assign_coords(x=[120]), # assign bad index values + obj, + template=obj.isel(x=[1, 5, 9]), + ).compute() + + +def test_map_blocks_errors_bad_template_2(map_ds): + with raises_regex(ValueError, "unexpected data variables {'xyz'}"): + xr.map_blocks(lambda x: x.assign(xyz=1), map_ds, template=map_ds).compute() + + @pytest.mark.parametrize("obj", [make_da(), make_ds()]) def test_map_blocks_object_method(obj): def func(obj): From 0e43ba9c7080fc5f1076b0bccec09aeac1a3499b Mon Sep 17 00:00:00 2001 From: Prajjwal Nijhara Date: Thu, 7 May 2020 04:25:39 +0530 Subject: [PATCH 15/71] Use literal syntax instead of function calls to create the data structure (#4038) * Use literal syntax instead of function calls to create the data structure * Update whats-new.rst * Update whats-new.rst --- doc/whats-new.rst | 4 +--- xarray/core/merge.py | 2 +- xarray/util/print_versions.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b22a7217568..3be7c2f45e6 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -142,9 +142,7 @@ Internal Changes - Use ``async`` / ``await`` for the asynchronous distributed tests. (:issue:`3987`, :pull:`3989`) By `Justus Magin `_. -- Remove unnecessary comprehensions becuase the built-in functions like - ``all``, ``any``, ``enumerate``, ``sum``, ``tuple`` etc. can work directly with a - generator expression. (:pull:`4026`) +- Various internal code clean-ups (:pull:`4026`, :pull:`4038`). By `Prajjwal Nijhara `_. .. _whats-new.0.15.1: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index fea94246471..35b77d700a0 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -841,7 +841,7 @@ def merge( from .dataarray import DataArray from .dataset import Dataset - dict_like_objects = list() + dict_like_objects = [] for obj in objects: if not isinstance(obj, (DataArray, Dataset, dict)): raise TypeError( diff --git a/xarray/util/print_versions.py b/xarray/util/print_versions.py index 32051bb6843..96983c83aab 100755 --- a/xarray/util/print_versions.py +++ b/xarray/util/print_versions.py @@ -129,7 +129,7 @@ def show_versions(file=sys.stdout): ("sphinx", lambda mod: mod.__version__), ] - deps_blob = list() + deps_blob = [] for (modname, ver_f) in deps: try: if modname in sys.modules: From 69548df9826cde9df6cbdae9c033c9fb1e62d493 Mon Sep 17 00:00:00 2001 From: Keisuke Fujii Date: Fri, 8 May 2020 05:36:31 +0900 Subject: [PATCH 16/71] support darkmode (#4036) * support darkmode but in vscode only * remove unused space * support colab (maybe) and whatsnew --- doc/whats-new.rst | 4 +++- xarray/static/css/style.css | 12 ++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 3be7c2f45e6..a1d52b28ed5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -73,7 +73,9 @@ New Features Bug fixes ~~~~~~~~~ -- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue`3977`) +- Support dark mode in VS code (:issue:`4024`) + By `Keisuke Fujii `_. +- ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`) By `Huite Bootsma `_. - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`) By `Keisuke Fujii `_. diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index 7e382de3b5b..acfe85d5ac7 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -13,6 +13,18 @@ --xr-background-color-row-odd: var(--jp-layout-color2, #eeeeee); } +html[theme=dark], +body.vscode-dark { + --xr-font-color0: rgba(255, 255, 255, 1); + --xr-font-color2: rgba(255, 255, 255, 0.54); + --xr-font-color3: rgba(255, 255, 255, 0.38); + --xr-border-color: #1F1F1F; + --xr-disabled-color: #515151; + --xr-background-color: #111111; + --xr-background-color-row-even: #111111; + --xr-background-color-row-odd: #313131; +} + .xr-wrap { min-width: 300px; max-width: 700px; From 3e5dd6ef32b9c69806af69a3a5168edcf3b2e21f Mon Sep 17 00:00:00 2001 From: David Brochart Date: Mon, 11 May 2020 16:54:27 +0200 Subject: [PATCH 17/71] Add xarray-leaflet to the visualization projects (#4051) --- doc/related-projects.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/related-projects.rst b/doc/related-projects.rst index 57b8da0c447..b02c4be7338 100644 --- a/doc/related-projects.rst +++ b/doc/related-projects.rst @@ -75,6 +75,7 @@ Visualization - `Datashader `_, `geoviews `_, `holoviews `_, : visualization packages for large data. - `hvplot `_ : A high-level plotting API for the PyData ecosystem built on HoloViews. - `psyplot `_: Interactive data visualization with python. +- `xarray-leaflet `_: An xarray extension for tiles map plotting based on ipyleaflet. Non-Python projects ~~~~~~~~~~~~~~~~~~~ From bd84186acbd84bd386134a5b60111596cee2d8ec Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 12 May 2020 22:47:50 +0000 Subject: [PATCH 18/71] Fix contour when levels is scalar and norm is provided. (#3914) Fixes #3735 --- doc/whats-new.rst | 2 ++ xarray/plot/utils.py | 2 +- xarray/tests/test_plot.py | 9 +++++---- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a1d52b28ed5..0724460b1e5 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -91,6 +91,8 @@ Bug fixes By `Deepak Cherian `_ - Fix :py:class:`~xarray.plot.FacetGrid` when ``vmin == vmax``. (:issue:`3734`) By `Deepak Cherian `_ +- Fix plotting when ``levels`` is a scalar and ``norm`` is provided. (:issue:`3735`) + By `Deepak Cherian `_ - Fix bug where plotting line plots with 2D coordinates depended on dimension order. (:issue:`3933`) By `Tom Nicholas `_. diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index c3512828888..cb993c192d9 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -268,7 +268,7 @@ def _determine_cmap_params( cmap = OPTIONS["cmap_sequential"] # Handle discrete levels - if levels is not None and norm is None: + if levels is not None: if is_scalar(levels): if user_minmax: levels = np.linspace(vmin, vmax, levels) diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index bf1f9ed60bb..af7c686bf60 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -854,21 +854,22 @@ def test_norm_sets_vmin_vmax(self): vmin = self.data.min() vmax = self.data.max() - for norm, extend in zip( + for norm, extend, levels in zip( [ + mpl.colors.Normalize(), mpl.colors.Normalize(), mpl.colors.Normalize(vmin + 0.1, vmax - 0.1), mpl.colors.Normalize(None, vmax - 0.1), mpl.colors.Normalize(vmin + 0.1, None), ], - ["neither", "both", "max", "min"], + ["neither", "neither", "both", "max", "min"], + [7, None, None, None, None], ): test_min = vmin if norm.vmin is None else norm.vmin test_max = vmax if norm.vmax is None else norm.vmax - cmap_params = _determine_cmap_params(self.data, norm=norm) - + cmap_params = _determine_cmap_params(self.data, norm=norm, levels=levels) assert cmap_params["vmin"] == test_min assert cmap_params["vmax"] == test_max assert cmap_params["extend"] == extend From c73e9589da06730848a876b1c277bf1ad389372f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 13 May 2020 17:51:43 +0200 Subject: [PATCH 19/71] FIX: correct dask array handling in _calc_idxminmax (#3922) * FIX: correct dask array handling in _calc_idxminmax * FIX: remove unneeded import, reformat via black * fix idxmax, idxmin with dask arrays * FIX: use array[dim].data in `_calc_idxminmax` as per @keewis suggestion, attach dim name to result * ADD: add dask tests to `idxmin`/`idxmax` dataarray tests * FIX: add back fixture line removed by accident * ADD: complete dask handling in `idxmin`/`idxmax` tests in test_dataarray, xfail dask tests for dtype dateime64 (M) * ADD: add "support dask handling for idxmin/idxmax" in whats-new.rst * MIN: reintroduce changes added by #3953 * MIN: change if-clause to use `and` instead of `&` as per review-comment * MIN: change if-clause to use `and` instead of `&` as per review-comment * WIP: remove dask handling entirely for debugging purposes * Test for dask computes * WIP: re-add dask handling (map_blocks-approach), add `with raise_if_dask_computes()` context to idxmin-tests * Use dask indexing instead of map_blocks. * Better chunk choice. * Return -1 for _nan_argminmax_object if all NaNs along dim * Revert "Return -1 for _nan_argminmax_object if all NaNs along dim" This reverts commit 58901b9da821a04f2ec085577cb916c4d67f6f50. * Raise error for object arrays * No error for object arrays. Instead expect 1 compute in tests. Co-authored-by: dcherian --- doc/whats-new.rst | 3 + xarray/core/computation.py | 23 +++---- xarray/tests/test_dataarray.py | 120 ++++++++++++++++++++++++++------- 3 files changed, 110 insertions(+), 36 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0724460b1e5..cd30fab0160 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -53,6 +53,9 @@ New Features - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:issue:`60`, :pull:`3871`) By `Todd Jennings `_ +- Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, + :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`) + By `Kai Mühlbauer `_. - More support for unit aware arrays with pint (:pull:`3643`) By `Justus Magin `_. - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even diff --git a/xarray/core/computation.py b/xarray/core/computation.py index a3723ea9db9..28bf818e4a3 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -26,7 +26,6 @@ from . import dtypes, duck_array_ops, utils from .alignment import deep_align from .merge import merge_coordinates_without_align -from .nanops import dask_array from .options import OPTIONS from .pycompat import dask_array_type from .utils import is_dict_like @@ -1380,24 +1379,24 @@ def _calc_idxminmax( # This will run argmin or argmax. indx = func(array, dim=dim, axis=None, keep_attrs=keep_attrs, skipna=skipna) - # Get the coordinate we want. - coordarray = array[dim] - # Handle dask arrays. - if isinstance(array, dask_array_type): - res = dask_array.map_blocks(coordarray, indx, dtype=indx.dtype) + if isinstance(array.data, dask_array_type): + import dask.array + + chunks = dict(zip(array.dims, array.chunks)) + dask_coord = dask.array.from_array(array[dim].data, chunks=chunks[dim]) + res = indx.copy(data=dask_coord[(indx.data,)]) + # we need to attach back the dim name + res.name = dim else: - res = coordarray[ - indx, - ] + res = array[dim][(indx,)] + # The dim is gone but we need to remove the corresponding coordinate. + del res.coords[dim] if skipna or (skipna is None and array.dtype.kind in na_dtypes): # Put the NaN values back in after removing them res = res.where(~allna, fill_value) - # The dim is gone but we need to remove the corresponding coordinate. - del res.coords[dim] - # Copy attributes from argmin/argmax, if any res.attrs = indx.attrs diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 6984d5361d2..a01234616a4 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -34,6 +34,8 @@ source_ndarray, ) +from .test_dask import raise_if_dask_computes + class TestDataArray: @pytest.fixture(autouse=True) @@ -4524,11 +4526,21 @@ def test_argmax(self, x, minindex, maxindex, nanindex): assert_identical(result2, expected2) - def test_idxmin(self, x, minindex, maxindex, nanindex): - ar0 = xr.DataArray( + @pytest.mark.parametrize("use_dask", [True, False]) + def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + if use_dask and x.dtype.kind == "M": + pytest.xfail("dask operation 'argmin' breaks when dtype is datetime64 (M)") + ar0_raw = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) + if use_dask: + ar0 = ar0_raw.chunk({}) + else: + ar0 = ar0_raw + # dim doesn't exist with pytest.raises(KeyError): ar0.idxmin(dim="spam") @@ -4620,11 +4632,21 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): result7 = ar0.idxmin(fill_value=-1j) assert_identical(result7, expected7) - def test_idxmax(self, x, minindex, maxindex, nanindex): - ar0 = xr.DataArray( + @pytest.mark.parametrize("use_dask", [True, False]) + def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + if use_dask and x.dtype.kind == "M": + pytest.xfail("dask operation 'argmax' breaks when dtype is datetime64 (M)") + ar0_raw = xr.DataArray( x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs ) + if use_dask: + ar0 = ar0_raw.chunk({}) + else: + ar0 = ar0_raw + # dim doesn't exist with pytest.raises(KeyError): ar0.idxmax(dim="spam") @@ -4944,14 +4966,31 @@ def test_argmax(self, x, minindex, maxindex, nanindex): assert_identical(result3, expected2) - def test_idxmin(self, x, minindex, maxindex, nanindex): - ar0 = xr.DataArray( + @pytest.mark.parametrize("use_dask", [True, False]) + def test_idxmin(self, x, minindex, maxindex, nanindex, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + if use_dask and x.dtype.kind == "M": + pytest.xfail("dask operation 'argmin' breaks when dtype is datetime64 (M)") + + if x.dtype.kind == "O": + # TODO: nanops._nan_argminmax_object computes once to check for all-NaN slices. + max_computes = 1 + else: + max_computes = 0 + + ar0_raw = xr.DataArray( x, dims=["y", "x"], coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, attrs=self.attrs, ) + if use_dask: + ar0 = ar0_raw.chunk({}) + else: + ar0 = ar0_raw + assert_identical(ar0, ar0) # No dimension specified @@ -4982,15 +5021,18 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected0.name = "x" # Default fill value (NaN) - result0 = ar0.idxmin(dim="x") + with raise_if_dask_computes(max_computes=max_computes): + result0 = ar0.idxmin(dim="x") assert_identical(result0, expected0) # Manually specify NaN fill_value - result1 = ar0.idxmin(dim="x", fill_value=np.NaN) + with raise_if_dask_computes(max_computes=max_computes): + result1 = ar0.idxmin(dim="x", fill_value=np.NaN) assert_identical(result1, expected0) # keep_attrs - result2 = ar0.idxmin(dim="x", keep_attrs=True) + with raise_if_dask_computes(max_computes=max_computes): + result2 = ar0.idxmin(dim="x", keep_attrs=True) expected2 = expected0.copy() expected2.attrs = self.attrs assert_identical(result2, expected2) @@ -5008,11 +5050,13 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected3.name = "x" expected3.attrs = {} - result3 = ar0.idxmin(dim="x", skipna=False) + with raise_if_dask_computes(max_computes=max_computes): + result3 = ar0.idxmin(dim="x", skipna=False) assert_identical(result3, expected3) # fill_value should be ignored with skipna=False - result4 = ar0.idxmin(dim="x", skipna=False, fill_value=-100j) + with raise_if_dask_computes(max_computes=max_computes): + result4 = ar0.idxmin(dim="x", skipna=False, fill_value=-100j) assert_identical(result4, expected3) # Float fill_value @@ -5024,7 +5068,8 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected5 = xr.concat(expected5, dim="y") expected5.name = "x" - result5 = ar0.idxmin(dim="x", fill_value=-1.1) + with raise_if_dask_computes(max_computes=max_computes): + result5 = ar0.idxmin(dim="x", fill_value=-1.1) assert_identical(result5, expected5) # Integer fill_value @@ -5036,7 +5081,8 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected6 = xr.concat(expected6, dim="y") expected6.name = "x" - result6 = ar0.idxmin(dim="x", fill_value=-1) + with raise_if_dask_computes(max_computes=max_computes): + result6 = ar0.idxmin(dim="x", fill_value=-1) assert_identical(result6, expected6) # Complex fill_value @@ -5048,17 +5094,35 @@ def test_idxmin(self, x, minindex, maxindex, nanindex): expected7 = xr.concat(expected7, dim="y") expected7.name = "x" - result7 = ar0.idxmin(dim="x", fill_value=-5j) + with raise_if_dask_computes(max_computes=max_computes): + result7 = ar0.idxmin(dim="x", fill_value=-5j) assert_identical(result7, expected7) - def test_idxmax(self, x, minindex, maxindex, nanindex): - ar0 = xr.DataArray( + @pytest.mark.parametrize("use_dask", [True, False]) + def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask): + if use_dask and not has_dask: + pytest.skip("requires dask") + if use_dask and x.dtype.kind == "M": + pytest.xfail("dask operation 'argmax' breaks when dtype is datetime64 (M)") + + if x.dtype.kind == "O": + # TODO: nanops._nan_argminmax_object computes once to check for all-NaN slices. + max_computes = 1 + else: + max_computes = 0 + + ar0_raw = xr.DataArray( x, dims=["y", "x"], coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])}, attrs=self.attrs, ) + if use_dask: + ar0 = ar0_raw.chunk({}) + else: + ar0 = ar0_raw + # No dimension specified with pytest.raises(ValueError): ar0.idxmax() @@ -5090,15 +5154,18 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected0.name = "x" # Default fill value (NaN) - result0 = ar0.idxmax(dim="x") + with raise_if_dask_computes(max_computes=max_computes): + result0 = ar0.idxmax(dim="x") assert_identical(result0, expected0) # Manually specify NaN fill_value - result1 = ar0.idxmax(dim="x", fill_value=np.NaN) + with raise_if_dask_computes(max_computes=max_computes): + result1 = ar0.idxmax(dim="x", fill_value=np.NaN) assert_identical(result1, expected0) # keep_attrs - result2 = ar0.idxmax(dim="x", keep_attrs=True) + with raise_if_dask_computes(max_computes=max_computes): + result2 = ar0.idxmax(dim="x", keep_attrs=True) expected2 = expected0.copy() expected2.attrs = self.attrs assert_identical(result2, expected2) @@ -5116,11 +5183,13 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected3.name = "x" expected3.attrs = {} - result3 = ar0.idxmax(dim="x", skipna=False) + with raise_if_dask_computes(max_computes=max_computes): + result3 = ar0.idxmax(dim="x", skipna=False) assert_identical(result3, expected3) # fill_value should be ignored with skipna=False - result4 = ar0.idxmax(dim="x", skipna=False, fill_value=-100j) + with raise_if_dask_computes(max_computes=max_computes): + result4 = ar0.idxmax(dim="x", skipna=False, fill_value=-100j) assert_identical(result4, expected3) # Float fill_value @@ -5132,7 +5201,8 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected5 = xr.concat(expected5, dim="y") expected5.name = "x" - result5 = ar0.idxmax(dim="x", fill_value=-1.1) + with raise_if_dask_computes(max_computes=max_computes): + result5 = ar0.idxmax(dim="x", fill_value=-1.1) assert_identical(result5, expected5) # Integer fill_value @@ -5144,7 +5214,8 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected6 = xr.concat(expected6, dim="y") expected6.name = "x" - result6 = ar0.idxmax(dim="x", fill_value=-1) + with raise_if_dask_computes(max_computes=max_computes): + result6 = ar0.idxmax(dim="x", fill_value=-1) assert_identical(result6, expected6) # Complex fill_value @@ -5156,7 +5227,8 @@ def test_idxmax(self, x, minindex, maxindex, nanindex): expected7 = xr.concat(expected7, dim="y") expected7.name = "x" - result7 = ar0.idxmax(dim="x", fill_value=-5j) + with raise_if_dask_computes(max_computes=max_computes): + result7 = ar0.idxmax(dim="x", fill_value=-5j) assert_identical(result7, expected7) From 8051c47686ae23062f9008785563d62327735002 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 13 May 2020 19:35:45 +0200 Subject: [PATCH 20/71] fix the failing flake8 CI (#4057) * rename d and l to dim and length --- xarray/backends/common.py | 2 +- xarray/backends/memory.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index fa3ee19f542..63c4c956f86 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -241,7 +241,7 @@ def encode_attribute(self, a): """encode one attribute""" return a - def set_dimension(self, d, l): # pragma: no cover + def set_dimension(self, dim, length): # pragma: no cover raise NotImplementedError() def set_attribute(self, k, v): # pragma: no cover diff --git a/xarray/backends/memory.py b/xarray/backends/memory.py index bee6521bce2..17095d09651 100644 --- a/xarray/backends/memory.py +++ b/xarray/backends/memory.py @@ -40,6 +40,6 @@ def set_attribute(self, k, v): # copy to imitate writing to disk. self._attributes[k] = copy.deepcopy(v) - def set_dimension(self, d, l, unlimited_dims=None): + def set_dimension(self, dim, length, unlimited_dims=None): # in this model, dimensions are accounted for in the variables pass From 2542a63f6ebed1a464af7fc74b9f3bf302925803 Mon Sep 17 00:00:00 2001 From: clausmichele <31700619+clausmichele@users.noreply.github.com> Date: Thu, 14 May 2020 14:28:54 +0000 Subject: [PATCH 21/71] Fixed typo in rasterio docs (#4063) --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 738d8d2b7ab..27e182906a4 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -804,7 +804,7 @@ GDAL readable raster data using `rasterio`_ as well as for exporting to a geoTIF In [4]: rds.rio.crs Out[4]: CRS.from_epsg(32618) - In [5]: rds4326 = rio.rio.reproject("epsg:4326") + In [5]: rds4326 = rds.rio.reproject("epsg:4326") In [6]: rds4326.rio.crs Out[6]: CRS.from_epsg(4326) From 742d00076c8e79cb753b4b4856dbbef5f52878c6 Mon Sep 17 00:00:00 2001 From: aurghs <35919497+aurghs@users.noreply.github.com> Date: Tue, 19 May 2020 17:43:53 +0200 Subject: [PATCH 22/71] #1621 optional decode timedelta (#4071) * add decode_timedelta kwarg in decode_cf and open_* functions and test. * Fix style issue * Add chang author reference * removed check decode_timedelta in open_dataset * fix docstring indentation * fix: force dtype in test decode_timedelta --- doc/whats-new.rst | 6 ++++++ xarray/backends/api.py | 16 +++++++++++++++ xarray/backends/zarr.py | 8 ++++++++ xarray/conventions.py | 21 ++++++++++++++----- xarray/tests/test_conventions.py | 35 ++++++++++++++++++++++++++++++++ 5 files changed, 81 insertions(+), 5 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cd30fab0160..f5b6f2f3e86 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -74,6 +74,12 @@ New Features where the result of a computation could not be inferred automatically. By `Deepak Cherian `_ +- Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`, + (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`, + :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas + independently of time decoding (:issue:`1621`) + `Aureliana Barghini ` + Bug fixes ~~~~~~~~~ - Support dark mode in VS code (:issue:`4024`) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 184aad579a2..0919d2a582b 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -303,6 +303,7 @@ def open_dataset( drop_variables=None, backend_kwargs=None, use_cftime=None, + decode_timedelta=None, ): """Open and decode a dataset from a file or file-like object. @@ -383,6 +384,11 @@ def open_dataset( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -435,6 +441,7 @@ def open_dataset( decode_times = False concat_characters = False decode_coords = False + decode_timedelta = False if cache is None: cache = chunks is None @@ -451,6 +458,7 @@ def maybe_decode_store(store, lock=False): decode_coords=decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) _protect_dataset_variables_inplace(ds, cache) @@ -477,6 +485,7 @@ def maybe_decode_store(store, lock=False): chunks, drop_variables, use_cftime, + decode_timedelta, ) name_prefix = "open_dataset-%s" % token ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token) @@ -561,6 +570,7 @@ def open_dataarray( drop_variables=None, backend_kwargs=None, use_cftime=None, + decode_timedelta=None, ): """Open an DataArray from a file or file-like object containing a single data variable. @@ -640,6 +650,11 @@ def open_dataarray( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Notes ----- @@ -671,6 +686,7 @@ def open_dataarray( drop_variables=drop_variables, backend_kwargs=backend_kwargs, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) if len(dataset.data_vars) != 1: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index de6b627447e..540759a1c4c 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -503,6 +503,7 @@ def open_zarr( drop_variables=None, consolidated=False, overwrite_encoded_chunks=False, + decode_timedelta=None, **kwargs, ): """Load and decode a dataset from a Zarr store. @@ -562,6 +563,11 @@ def open_zarr( consolidated : bool, optional Whether to open the store using zarr's consolidated metadata capability. Only works for stores that have already been consolidated. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -612,6 +618,7 @@ def open_zarr( decode_times = False concat_characters = False decode_coords = False + decode_timedelta = False def maybe_decode_store(store, lock=False): ds = conventions.decode_cf( @@ -621,6 +628,7 @@ def maybe_decode_store(store, lock=False): concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables, + decode_timedelta=decode_timedelta, ) # TODO: this is where we would apply caching diff --git a/xarray/conventions.py b/xarray/conventions.py index df24d0d3d8d..588fcea71a3 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -266,6 +266,7 @@ def decode_cf_variable( decode_endianness=True, stack_char_dim=True, use_cftime=None, + decode_timedelta=None, ): """ Decodes a variable which may hold CF encoded information. @@ -315,6 +316,9 @@ def decode_cf_variable( var = as_variable(var) original_dtype = var.dtype + if decode_timedelta is None: + decode_timedelta = decode_times + if concat_characters: if stack_char_dim: var = strings.CharacterArrayCoder().decode(var, name=name) @@ -328,12 +332,10 @@ def decode_cf_variable( ]: var = coder.decode(var, name=name) + if decode_timedelta: + var = times.CFTimedeltaCoder().decode(var, name=name) if decode_times: - for coder in [ - times.CFTimedeltaCoder(), - times.CFDatetimeCoder(use_cftime=use_cftime), - ]: - var = coder.decode(var, name=name) + var = times.CFDatetimeCoder(use_cftime=use_cftime).decode(var, name=name) dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) # TODO(shoyer): convert everything below to use coders @@ -442,6 +444,7 @@ def decode_cf_variables( decode_coords=True, drop_variables=None, use_cftime=None, + decode_timedelta=None, ): """ Decode several CF encoded variables. @@ -492,6 +495,7 @@ def stackable(dim): decode_times=decode_times, stack_char_dim=stack_char_dim, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) if decode_coords: var_attrs = new_vars[k].attrs @@ -518,6 +522,7 @@ def decode_cf( decode_coords=True, drop_variables=None, use_cftime=None, + decode_timedelta=None, ): """Decode the given Dataset or Datastore according to CF conventions into a new Dataset. @@ -552,6 +557,11 @@ def decode_cf( represented using ``np.datetime64[ns]`` objects. If False, always decode times to ``np.datetime64[ns]`` objects; if this is not possible raise an error. + decode_timedelta : bool, optional + If True, decode variables and coordinates with time units in + {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'} + into timedelta objects. If False, leave them encoded as numbers. + If None (default), assume the same value of decode_time. Returns ------- @@ -583,6 +593,7 @@ def decode_cf( decode_coords, drop_variables=drop_variables, use_cftime=use_cftime, + decode_timedelta=decode_timedelta, ) ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars)) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index acb2400ea04..dfd20a303ff 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -311,6 +311,41 @@ def test_decode_dask_times(self): conventions.decode_cf(original).chunk(), ) + def test_decode_cf_time_kwargs(self): + ds = Dataset.from_dict( + { + "coords": { + "timedelta": { + "data": np.array([1, 2, 3], dtype="int64"), + "dims": "timedelta", + "attrs": {"units": "days"}, + }, + "time": { + "data": np.array([1, 2, 3], dtype="int64"), + "dims": "time", + "attrs": {"units": "days since 2000-01-01"}, + }, + }, + "dims": {"time": 3, "timedelta": 3}, + "data_vars": { + "a": {"dims": ("time", "timedelta"), "data": np.ones((3, 3))}, + }, + } + ) + + dsc = conventions.decode_cf(ds) + assert dsc.timedelta.dtype == np.dtype("m8[ns]") + assert dsc.time.dtype == np.dtype("M8[ns]") + dsc = conventions.decode_cf(ds, decode_times=False) + assert dsc.timedelta.dtype == np.dtype("int64") + assert dsc.time.dtype == np.dtype("int64") + dsc = conventions.decode_cf(ds, decode_times=True, decode_timedelta=False) + assert dsc.timedelta.dtype == np.dtype("int64") + assert dsc.time.dtype == np.dtype("M8[ns]") + dsc = conventions.decode_cf(ds, decode_times=False, decode_timedelta=True) + assert dsc.timedelta.dtype == np.dtype("m8[ns]") + assert dsc.time.dtype == np.dtype("int64") + class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore): def encode_variable(self, var): From f38b0c15aa90c6812118047ae637ee67048db51f Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 19 May 2020 18:49:25 +0200 Subject: [PATCH 23/71] remove the backslash escapes and typehint fragments in the API docs (#4070) * remove numpydoc which is the reason for the backslash-escaped stars * don't install numpydoc --- ci/requirements/doc.yml | 3 +-- doc/conf.py | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/ci/requirements/doc.yml b/ci/requirements/doc.yml index 2987303c92a..6caebc46cdf 100644 --- a/ci/requirements/doc.yml +++ b/ci/requirements/doc.yml @@ -17,11 +17,10 @@ dependencies: - netcdf4>=1.5 - numba - numpy>=1.17 - - numpydoc - pandas>=1.0 - rasterio>=1.1 - seaborn - setuptools - sphinx>=2.3 - sphinx_rtd_theme>=0.4 - - zarr>=2.4 \ No newline at end of file + - zarr>=2.4 diff --git a/doc/conf.py b/doc/conf.py index 578f9cf550d..5d304dab362 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -79,7 +79,6 @@ "sphinx.ext.extlinks", "sphinx.ext.mathjax", "sphinx.ext.napoleon", - "numpydoc", "IPython.sphinxext.ipython_directive", "IPython.sphinxext.ipython_console_highlighting", "nbsphinx", From 261df2e56b2d554927887b8943f84514fc60369b Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Wed, 20 May 2020 13:04:01 -0400 Subject: [PATCH 24/71] Document Xarray zarr encoding conventions (#4047) * document zarr encoding * link to zarr spec * fix typo [ci skip] --- doc/internals.rst | 50 +++++++++++++++++++++++++++++++++++++++++++++++ doc/io.rst | 6 ++++-- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/doc/internals.rst b/doc/internals.rst index c23aab8c5d7..27c7c4e1d87 100644 --- a/doc/internals.rst +++ b/doc/internals.rst @@ -138,3 +138,53 @@ To help users keep things straight, please `let us know `_ if you plan to write a new accessor for an open source library. In the future, we will maintain a list of accessors and the libraries that implement them on this page. + +.. _zarr_encoding: + +Zarr Encoding Specification +--------------------------- + +In implementing support for the `Zarr `_ storage +format, Xarray developers made some *ad hoc* choices about how to store +NetCDF data in Zarr. +Future versions of the Zarr spec will likely include a more formal convention +for the storage of the NetCDF data model in Zarr; see +`Zarr spec repo `_ for ongoing +discussion. + +First, Xarray can only read and write Zarr groups. There is currently no support +for reading / writting individual Zarr arrays. Zarr groups are mapped to +Xarray ``Dataset`` objects. + +Second, from Xarray's point of view, the key difference between +NetCDF and Zarr is that all NetCDF arrays have *dimension names* while Zarr +arrays do not. Therefore, in order to store NetCDF data in Zarr, Xarray must +somehow encode and decode the name of each array's dimensions. + +To accomplish this, Xarray developers decided to define a special Zarr array +attribute: ``_ARRAY_DIMENSIONS``. The value of this attribute is a list of +dimension names (strings), for example ``["time", "lon", "lat"]``. When writing +data to Zarr, Xarray sets this attribute on all variables based on the variable +dimensions. When reading a Zarr group, Xarray looks for this attribute on all +arrays, raising an error if it can't be found. The attribute is used to define +the variable dimension names and then removed from the attributes dictionary +returned to the user. + +Because of these choices, Xarray cannot read arbitrary array data, but only +Zarr data with valid ``_ARRAY_DIMENSIONS`` attributes on each array. + +After decoding the ``_ARRAY_DIMENSIONS`` attribute and assigning the variable +dimensions, Xarray proceeds to [optionally] decode each variable using its +standard CF decoding machinery used for NetCDF data (see :py:func:`decode_cf`). + +As a concrete example, here we write a tutorial dataset to Zarr and then +re-open it directly with Zarr: + +.. ipython:: python + + ds = xr.tutorial.load_dataset('rasm') + ds.to_zarr('rasm.zarr', mode='w') + import zarr + zgroup = zarr.open('rasm.zarr') + print(zgroup.tree()) + dict(zgroup['Tair'].attrs) diff --git a/doc/io.rst b/doc/io.rst index 27e182906a4..1f854586202 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -463,7 +463,7 @@ This is not CF-compliant but again facilitates roundtripping of xarray datasets. Invalid netCDF files ~~~~~~~~~~~~~~~~~~~~ -The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't +The library ``h5netcdf`` allows writing some dtypes (booleans, complex, ...) that aren't allowed in netCDF4 (see `h5netcdf documentation `_). This feature is availabe through :py:meth:`DataArray.to_netcdf` and @@ -837,7 +837,9 @@ Xarray's Zarr backend allows xarray to leverage these capabilities. Xarray can't open just any zarr dataset, because xarray requires special metadata (attributes) describing the dataset dimensions and coordinates. At this time, xarray can only open zarr datasets that have been written by -xarray. To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method. +xarray. For implementation details, see :ref:`zarr_encoding`. + +To write a dataset with zarr, we use the :py:attr:`Dataset.to_zarr` method. To write to a local directory, we pass a path to a directory .. ipython:: python From cb90d5542bd6868d5548ae8efb5815c249c2c329 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 20 May 2020 19:06:39 +0200 Subject: [PATCH 25/71] Fix html repr in untrusted notebooks (plain text fallback) (#4053) * add html pre element with text repr as fallback The PRE element is not displayed when CSS is injected. When CSS is not injected (untrusted notebook), the PRE element is shown but not the DIV container used for the HTML repr. * remove title elements in svg icons Prevent showing those when fallback to plain text repr. A title tag is already present in the HTML label elements. * add basic test * update what's new --- doc/whats-new.rst | 2 ++ xarray/core/formatting_html.py | 14 ++++++++++---- xarray/static/css/style.css | 6 ++++++ xarray/static/html/icons-svg-inline.html | 2 -- xarray/tests/test_formatting_html.py | 7 +++++++ 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f5b6f2f3e86..dab48383c55 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -116,6 +116,8 @@ Bug fixes - Fix bug in time parsing failing to fall back to cftime. This was causing time variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`) By `Ryan May `_. +- Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`) + By `Benoit Bovy `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py index 6e345582ed0..69832d6ca3d 100644 --- a/xarray/core/formatting_html.py +++ b/xarray/core/formatting_html.py @@ -222,14 +222,20 @@ def array_section(obj): ) -def _obj_repr(header_components, sections): +def _obj_repr(obj, header_components, sections): + """Return HTML repr of an xarray object. + + If CSS is not injected (untrusted notebook), fallback to the plain text repr. + + """ header = f"
{''.join(h for h in header_components)}
" sections = "".join(f"
  • {s}
  • " for s in sections) return ( "
    " f"{ICONS_SVG}" - "
    " + f"
    {escape(repr(obj))}
    " + "" @@ -257,7 +263,7 @@ def array_repr(arr): sections.append(attr_section(arr.attrs)) - return _obj_repr(header_components, sections) + return _obj_repr(arr, header_components, sections) def dataset_repr(ds): @@ -272,4 +278,4 @@ def dataset_repr(ds): attr_section(ds.attrs), ] - return _obj_repr(header_components, sections) + return _obj_repr(ds, header_components, sections) diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css index acfe85d5ac7..39cd6d6755f 100644 --- a/xarray/static/css/style.css +++ b/xarray/static/css/style.css @@ -26,10 +26,16 @@ body.vscode-dark { } .xr-wrap { + display: block; min-width: 300px; max-width: 700px; } +.xr-text-repr-fallback { + /* fallback to plain text repr when CSS is not injected (untrusted notebook) */ + display: none; +} + .xr-header { padding-top: 6px; padding-bottom: 6px; diff --git a/xarray/static/html/icons-svg-inline.html b/xarray/static/html/icons-svg-inline.html index c44f89c4304..b0e837a26cd 100644 --- a/xarray/static/html/icons-svg-inline.html +++ b/xarray/static/html/icons-svg-inline.html @@ -1,13 +1,11 @@ -Show/Hide data repr -Show/Hide attributes diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 94653016416..90e74f1f78f 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -139,6 +139,13 @@ def test_repr_of_dataset(dataset): assert "<IA>" in formatted +def test_repr_text_fallback(dataset): + formatted = fh.dataset_repr(dataset) + + # Just test that the "pre" block used for fallback to plain text is present. + assert "
    " in formatted
    +
    +
     def test_variable_repr_html():
         v = xr.Variable(["time", "x"], [[1, 2, 3], [4, 5, 6]], {"foo": "bar"})
         assert hasattr(v, "_repr_html_")
    
    From 5c04ebfde12a70913d28ffa70e9e13c0b992dfa9 Mon Sep 17 00:00:00 2001
    From: Yunus Sevinchan 
    Date: Wed, 20 May 2020 19:08:23 +0200
    Subject: [PATCH 26/71] Add NetCDF3 dtype coercion for unsigned integer types
     (#4018)
    
    * In netcdf3 backend, also coerce unsigned integer dtypes
    
    * Adjust test for netcdf3 rountrip to include coercion
    
    This might be a bit too general for what is required at this point,
    though ... :thinking:
    
    * Add test for failing dtype coercion
    
    * Add What's New entry for issue #4014 and PR #4018
    
    * Move netcdf3-specific test to NetCDF3Only class
    
    Also uses a class variable for definition of netcdf3 formats now.
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst             |  3 +++
     xarray/backends/netcdf3.py    | 26 ++++++++++++++++++-------
     xarray/tests/test_backends.py | 36 ++++++++++++++++++++++++++++++-----
     3 files changed, 53 insertions(+), 12 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index dab48383c55..447aaf5b0bf 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -70,6 +70,9 @@ New Features
       the :py:class:`~core.accessor_dt.DatetimeAccessor` (:pull:`3935`).  This
       feature requires cftime version 1.1.0 or greater.  By
       `Spencer Clark `_.
    +- For the netCDF3 backend, added dtype coercions for unsigned integer types.
    +  (:issue:`4014`, :pull:`4018`)
    +  By `Yunus Sevinchan `_
     - :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases
       where the result of a computation could not be inferred automatically.
       By `Deepak Cherian `_
    diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py
    index c9c4baf9b01..51d7fce22a0 100644
    --- a/xarray/backends/netcdf3.py
    +++ b/xarray/backends/netcdf3.py
    @@ -28,7 +28,14 @@
     
     # These data-types aren't supported by netCDF3, so they are automatically
     # coerced instead as indicated by the "coerce_nc3_dtype" function
    -_nc3_dtype_coercions = {"int64": "int32", "bool": "int8"}
    +_nc3_dtype_coercions = {
    +    "int64": "int32",
    +    "uint64": "int32",
    +    "uint32": "int32",
    +    "uint16": "int16",
    +    "uint8": "int8",
    +    "bool": "int8",
    +}
     
     # encode all strings as UTF-8
     STRING_ENCODING = "utf-8"
    @@ -37,12 +44,17 @@
     def coerce_nc3_dtype(arr):
         """Coerce an array to a data type that can be stored in a netCDF-3 file
     
    -    This function performs the following dtype conversions:
    -        int64 -> int32
    -        bool -> int8
    -
    -    Data is checked for equality, or equivalence (non-NaN values) with
    -    `np.allclose` with the default keyword arguments.
    +    This function performs the dtype conversions as specified by the
    +    ``_nc3_dtype_coercions`` mapping:
    +        int64  -> int32
    +        uint64 -> int32
    +        uint32 -> int32
    +        uint16 -> int16
    +        uint8  -> int8
    +        bool   -> int8
    +
    +    Data is checked for equality, or equivalence (non-NaN values) using the
    +    ``(cast_array == original_array).all()``.
         """
         dtype = str(arr.dtype)
         if dtype in _nc3_dtype_coercions:
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 90deea51d2a..280b8bff63d 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -30,6 +30,7 @@
         save_mfdataset,
     )
     from xarray.backends.common import robust_getitem
    +from xarray.backends.netcdf3 import _nc3_dtype_coercions
     from xarray.backends.netCDF4_ import _extract_nc4_variable_encoding
     from xarray.backends.pydap_ import PydapDataStore
     from xarray.coding.variables import SerializationWarning
    @@ -227,7 +228,27 @@ def __getitem__(self, key):
     
     
     class NetCDF3Only:
    -    pass
    +    netcdf3_formats = ("NETCDF3_CLASSIC", "NETCDF3_64BIT")
    +
    +    @requires_scipy
    +    def test_dtype_coercion_error(self):
    +        """Failing dtype coercion should lead to an error"""
    +        for dtype, format in itertools.product(
    +            _nc3_dtype_coercions, self.netcdf3_formats
    +        ):
    +            if dtype == "bool":
    +                # coerced upcast (bool to int8) ==> can never fail
    +                continue
    +
    +            # Using the largest representable value, create some data that will
    +            # no longer compare equal after the coerced downcast
    +            maxval = np.iinfo(dtype).max
    +            x = np.array([0, 1, 2, maxval], dtype=dtype)
    +            ds = Dataset({"x": ("t", x, {})})
    +
    +            with create_tmp_file(allow_cleanup_failure=False) as path:
    +                with pytest.raises(ValueError, match="could not safely cast"):
    +                    ds.to_netcdf(path, format=format)
     
     
     class DatasetIOBase:
    @@ -296,9 +317,14 @@ def test_write_store(self):
         def check_dtypes_roundtripped(self, expected, actual):
             for k in expected.variables:
                 expected_dtype = expected.variables[k].dtype
    -            if isinstance(self, NetCDF3Only) and expected_dtype == "int64":
    -                # downcast
    -                expected_dtype = np.dtype("int32")
    +
    +            # For NetCDF3, the backend should perform dtype coercion
    +            if (
    +                isinstance(self, NetCDF3Only)
    +                and str(expected_dtype) in _nc3_dtype_coercions
    +            ):
    +                expected_dtype = np.dtype(_nc3_dtype_coercions[str(expected_dtype)])
    +
                 actual_dtype = actual.variables[k].dtype
                 # TODO: check expected behavior for string dtypes more carefully
                 string_kinds = {"O", "S", "U"}
    @@ -2156,7 +2182,7 @@ def test_cross_engine_read_write_netcdf3(self):
                 valid_engines.add("scipy")
     
             for write_engine in valid_engines:
    -            for format in ["NETCDF3_CLASSIC", "NETCDF3_64BIT"]:
    +            for format in self.netcdf3_formats:
                     with create_tmp_file() as tmp_file:
                         data.to_netcdf(tmp_file, format=format, engine=write_engine)
                         for read_engine in valid_engines:
    
    From 484d1ce5ff8969b6ca6fa942b344379725f33b9c Mon Sep 17 00:00:00 2001
    From: =?UTF-8?q?Aur=C3=A9lien=20Ponte?= 
    Date: Wed, 20 May 2020 20:55:32 +0200
    Subject: [PATCH 27/71] improve to_zarr doc about chunking (#4048)
    MIME-Version: 1.0
    Content-Type: text/plain; charset=UTF-8
    Content-Transfer-Encoding: 8bit
    
    * Update dataset.py
    
    * attempt at improving the doc formulation
    
    * update to_zarr docstring
    
    * minor style update
    
    * seems to fix doc compilation locally
    
    * delete saved_on_disk.nc
    
    Co-authored-by: Aurélien Ponte 
    ---
     xarray/core/dataset.py | 9 +++++++++
     1 file changed, 9 insertions(+)
    
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 3a55f3eca27..d93f4044a6d 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -1604,6 +1604,15 @@ def to_zarr(
             References
             ----------
             https://zarr.readthedocs.io/
    +
    +        Notes
    +        -----
    +        Zarr chunking behavior:
    +            If chunks are found in the encoding argument or attribute
    +            corresponding to any DataArray, those chunks are used.
    +            If a DataArray is a dask array, it is written with those chunks.
    +            If not other chunks are found, Zarr uses its own heuristics to
    +            choose automatic chunk sizes.
             """
             if encoding is None:
                 encoding = {}
    
    From 19b088636eb7d3f65ab7a1046ac672e0689371d8 Mon Sep 17 00:00:00 2001
    From: Prajjwal Nijhara 
    Date: Sat, 23 May 2020 23:00:50 +0530
    Subject: [PATCH 28/71] fix dangerous default arguments (#4006)
    
    Co-authored-by: Keewis 
    ---
     xarray/tests/test_backends.py    | 15 +++++++++++++--
     xarray/tests/test_conventions.py |  6 +++++-
     2 files changed, 18 insertions(+), 3 deletions(-)
    
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 280b8bff63d..49a39474b54 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -87,6 +87,7 @@
         dask_version = "10.0"
     
     ON_WINDOWS = sys.platform == "win32"
    +default_value = object()
     
     
     def open_example_dataset(name, *args, **kwargs):
    @@ -3630,11 +3631,21 @@ def create_tmp_geotiff(
         ny=3,
         nz=3,
         transform=None,
    -    transform_args=[5000, 80000, 1000, 2000.0],
    -    crs={"units": "m", "no_defs": True, "ellps": "WGS84", "proj": "utm", "zone": 18},
    +    transform_args=default_value,
    +    crs=default_value,
         open_kwargs=None,
         additional_attrs=None,
     ):
    +    if transform_args is default_value:
    +        transform_args = [5000, 80000, 1000, 2000.0]
    +    if crs is default_value:
    +        crs = {
    +            "units": "m",
    +            "no_defs": True,
    +            "ellps": "WGS84",
    +            "proj": "utm",
    +            "zone": 18,
    +        }
         # yields a temporary geotiff file and a corresponding expected DataArray
         import rasterio
         from rasterio.transform import from_origin
    diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py
    index dfd20a303ff..a5f4324d182 100644
    --- a/xarray/tests/test_conventions.py
    +++ b/xarray/tests/test_conventions.py
    @@ -363,8 +363,12 @@ def create_store(self):
     
         @contextlib.contextmanager
         def roundtrip(
    -        self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=False
    +        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
         ):
    +        if save_kwargs is None:
    +            save_kwargs = {}
    +        if open_kwargs is None:
    +            open_kwargs = {}
             store = CFEncodedInMemoryStore()
             data.dump_to_store(store, **save_kwargs)
             yield open_dataset(store, **open_kwargs)
    
    From f3ffab7ee4593c97e2ae63f22140d0a823a64b6d Mon Sep 17 00:00:00 2001
    From: Mathias Hauser 
    Date: Sat, 23 May 2020 23:06:18 +0200
    Subject: [PATCH 29/71] Fix bool weights (#4075)
    
    * add tests
    
    * weights: bool -> int
    
    * whats new
    
    * Apply suggestions from code review
    
    * avoid unecessary copy
    
    Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
    ---
     doc/whats-new.rst             |  4 +++-
     xarray/core/weighted.py       |  9 ++++++++-
     xarray/tests/test_weighted.py | 23 +++++++++++++++++++++++
     3 files changed, 34 insertions(+), 2 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 447aaf5b0bf..a4602c1edad 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -119,6 +119,8 @@ Bug fixes
     - Fix bug in time parsing failing to fall back to cftime. This was causing time
       variables with a time unit of `'msecs'` to fail to parse. (:pull:`3998`)
       By `Ryan May `_.
    +- Fix weighted mean when passing boolean weights (:issue:`4074`).
    +  By `Mathias Hauser `_.
     - Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`)
       By `Benoit Bovy `_.
     
    @@ -186,7 +188,7 @@ New Features
     
     - Weighted array reductions are now supported via the new :py:meth:`DataArray.weighted`
       and :py:meth:`Dataset.weighted` methods. See :ref:`comput.weighted`. (:issue:`422`, :pull:`2922`).
    -  By `Mathias Hauser `_
    +  By `Mathias Hauser `_.
     - The new jupyter notebook repr (``Dataset._repr_html_`` and
       ``DataArray._repr_html_``) (introduced in 0.14.1) is now on by default. To
       disable, use ``xarray.set_options(display_style="text")``.
    diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
    index 996d2e4c43e..21ed06ea85f 100644
    --- a/xarray/core/weighted.py
    +++ b/xarray/core/weighted.py
    @@ -142,7 +142,14 @@ def _sum_of_weights(
             # we need to mask data values that are nan; else the weights are wrong
             mask = da.notnull()
     
    -        sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False)
    +        # bool -> int, because ``xr.dot([True, True], [True, True])`` -> True
    +        # (and not 2); GH4074
    +        if self.weights.dtype == bool:
    +            sum_of_weights = self._reduce(
    +                mask, self.weights.astype(int), dim=dim, skipna=False
    +            )
    +        else:
    +            sum_of_weights = self._reduce(mask, self.weights, dim=dim, skipna=False)
     
             # 0-weights are not valid
             valid_weights = sum_of_weights != 0.0
    diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py
    index 24531215dfb..1bf685cc95d 100644
    --- a/xarray/tests/test_weighted.py
    +++ b/xarray/tests/test_weighted.py
    @@ -59,6 +59,18 @@ def test_weighted_sum_of_weights_nan(weights, expected):
         assert_equal(expected, result)
     
     
    +def test_weighted_sum_of_weights_bool():
    +    # https://github.com/pydata/xarray/issues/4074
    +
    +    da = DataArray([1, 2])
    +    weights = DataArray([True, True])
    +    result = da.weighted(weights).sum_of_weights()
    +
    +    expected = DataArray(2)
    +
    +    assert_equal(expected, result)
    +
    +
     @pytest.mark.parametrize("da", ([1.0, 2], [1, np.nan], [np.nan, np.nan]))
     @pytest.mark.parametrize("factor", [0, 1, 3.14])
     @pytest.mark.parametrize("skipna", (True, False))
    @@ -158,6 +170,17 @@ def test_weighted_mean_nan(weights, expected, skipna):
         assert_equal(expected, result)
     
     
    +def test_weighted_mean_bool():
    +    # https://github.com/pydata/xarray/issues/4074
    +    da = DataArray([1, 1])
    +    weights = DataArray([True, True])
    +    expected = DataArray(1)
    +
    +    result = da.weighted(weights).mean()
    +
    +    assert_equal(expected, result)
    +
    +
     def expected_weighted(da, weights, dim, skipna, operation):
         """
         Generate expected result using ``*`` and ``sum``. This is checked against
    
    From bdb1d331ac685fbc1371a3b98a795545e1682e7e Mon Sep 17 00:00:00 2001
    From: Mathias Hauser 
    Date: Mon, 25 May 2020 18:32:14 +0200
    Subject: [PATCH 30/71] allow multiindex levels in plots (#3938)
    
    * allow multiindex levels in plots
    
    * query label for test
    
    * 2D plts adapt err msg
    
    * 1D plts adapt err msg
    
    * add errmsg x==y
    
    * WIP _assert_xy_valid
    
    * _assert_valid_xy
    
    * add 1D example
    
    * update docs
    
    * simplify error msg
    
    * remove '
    
    * Apply suggestions from code review
    ---
     doc/plotting.rst          | 40 +++++++++++++++++++-
     doc/whats-new.rst         |  2 +
     xarray/plot/plot.py       | 19 +++++-----
     xarray/plot/utils.py      | 39 ++++++++++++++++----
     xarray/tests/test_plot.py | 77 ++++++++++++++++++++++++++++++++-------
     5 files changed, 145 insertions(+), 32 deletions(-)
    
    diff --git a/doc/plotting.rst b/doc/plotting.rst
    index 40c0ca1a496..f98f47f2567 100644
    --- a/doc/plotting.rst
    +++ b/doc/plotting.rst
    @@ -13,7 +13,7 @@ labels can also be used to easily create informative plots.
     xarray's plotting capabilities are centered around
     :py:class:`DataArray` objects.
     To plot :py:class:`Dataset` objects
    -simply access the relevant DataArrays, ie ``dset['var1']``.
    +simply access the relevant DataArrays, i.e. ``dset['var1']``.
     Dataset specific plotting routines are also available (see :ref:`plot-dataset`).
     Here we focus mostly on arrays 2d or larger. If your data fits
     nicely into a pandas DataFrame then you're better off using one of the more
    @@ -209,6 +209,44 @@ entire figure (as for matplotlib's ``figsize`` argument).
     
     .. _plotting.multiplelines:
     
    +=========================
    + Determine x-axis values
    +=========================
    +
    +Per default dimension coordinates are used for the x-axis (here the time coordinates).
    +However, you can also use non-dimension coordinates, MultiIndex levels, and dimensions
    +without coordinates along the x-axis. To illustrate this, let's calculate a 'decimal day' (epoch)
    +from the time and assign it as a non-dimension coordinate:
    +
    +.. ipython:: python
    +
    +    decimal_day = (air1d.time - air1d.time[0]) /  pd.Timedelta('1d')
    +    air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day))
    +    air1d_multi
    +
    +To use ``'decimal_day'`` as x coordinate it must be explicitly specified:
    +
    +.. ipython:: python
    +
    +    air1d_multi.plot(x="decimal_day")
    +
    +Creating a new MultiIndex named ``'date'`` from ``'time'`` and ``'decimal_day'``,
    +it is also possible to use a MultiIndex level as x-axis:
    +
    +.. ipython:: python
    +
    +    air1d_multi = air1d_multi.set_index(date=("time", "decimal_day"))
    +    air1d_multi.plot(x="decimal_day")
    +
    +Finally, if a dataset does not have any coordinates it enumerates all data points:
    +
    +.. ipython:: python
    +
    +    air1d_multi = air1d_multi.drop("date")
    +    air1d_multi.plot()
    +
    +The same applies to 2D plots below.
    +
     ====================================================
      Multiple lines showing variation along a dimension
     ====================================================
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index a4602c1edad..0be988da690 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -63,6 +63,8 @@ New Features
       By `Stephan Hoyer `_.
     - Allow plotting of boolean arrays. (:pull:`3766`)
       By `Marek Jacob `_
    +- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`). 
    +  By `Mathias Hauser `_.
     - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to
       the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which
       returns the days in the month each datetime in the index.  Now days in month
    diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
    index 4d6033bf00d..19a3f1e63e3 100644
    --- a/xarray/plot/plot.py
    +++ b/xarray/plot/plot.py
    @@ -14,6 +14,7 @@
     from .facetgrid import _easy_facetgrid
     from .utils import (
         _add_colorbar,
    +    _assert_valid_xy,
         _ensure_plottable,
         _infer_interval_breaks,
         _infer_xy_labels,
    @@ -29,19 +30,17 @@
     
     
     def _infer_line_data(darray, x, y, hue):
    -    error_msg = "must be either None or one of ({:s})".format(
    -        ", ".join(repr(dd) for dd in darray.dims)
    -    )
    +
         ndims = len(darray.dims)
     
    -    if x is not None and x not in darray.dims and x not in darray.coords:
    -        raise ValueError("x " + error_msg)
    +    if x is not None and y is not None:
    +        raise ValueError("Cannot specify both x and y kwargs for line plots.")
     
    -    if y is not None and y not in darray.dims and y not in darray.coords:
    -        raise ValueError("y " + error_msg)
    +    if x is not None:
    +        _assert_valid_xy(darray, x, "x")
     
    -    if x is not None and y is not None:
    -        raise ValueError("You cannot specify both x and y kwargs" "for line plots.")
    +    if y is not None:
    +        _assert_valid_xy(darray, y, "y")
     
         if ndims == 1:
             huename = None
    @@ -252,7 +251,7 @@ def line(
             Dimension or coordinate for which you want multiple lines plotted.
             If plotting against a 2D coordinate, ``hue`` must be a dimension.
         x, y : string, optional
    -        Dimensions or coordinates for x, y axis.
    +        Dimension, coordinate or MultiIndex level for x, y axis.
             Only one of these may be specified.
             The other coordinate plots values from the DataArray on which this
             plot method is called.
    diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py
    index cb993c192d9..e5c1fa89333 100644
    --- a/xarray/plot/utils.py
    +++ b/xarray/plot/utils.py
    @@ -360,7 +360,9 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None):
     
         darray must be a 2 dimensional data array, or 3d for imshow only.
         """
    -    assert x is None or x != y
    +    if (x is not None) and (x == y):
    +        raise ValueError("x and y cannot be equal.")
    +
         if imshow and darray.ndim == 3:
             return _infer_xy_labels_3d(darray, x, y, rgb)
     
    @@ -369,18 +371,41 @@ def _infer_xy_labels(darray, x, y, imshow=False, rgb=None):
                 raise ValueError("DataArray must be 2d")
             y, x = darray.dims
         elif x is None:
    -        if y not in darray.dims and y not in darray.coords:
    -            raise ValueError("y must be a dimension name if x is not supplied")
    +        _assert_valid_xy(darray, y, "y")
             x = darray.dims[0] if y == darray.dims[1] else darray.dims[1]
         elif y is None:
    -        if x not in darray.dims and x not in darray.coords:
    -            raise ValueError("x must be a dimension name if y is not supplied")
    +        _assert_valid_xy(darray, x, "x")
             y = darray.dims[0] if x == darray.dims[1] else darray.dims[1]
    -    elif any(k not in darray.coords and k not in darray.dims for k in (x, y)):
    -        raise ValueError("x and y must be coordinate variables")
    +    else:
    +        _assert_valid_xy(darray, x, "x")
    +        _assert_valid_xy(darray, y, "y")
    +
    +        if (
    +            all(k in darray._level_coords for k in (x, y))
    +            and darray._level_coords[x] == darray._level_coords[y]
    +        ):
    +            raise ValueError("x and y cannot be levels of the same MultiIndex")
    +
         return x, y
     
     
    +def _assert_valid_xy(darray, xy, name):
    +    """
    +    make sure x and y passed to plotting functions are valid
    +    """
    +
    +    # MultiIndex cannot be plotted; no point in allowing them here
    +    multiindex = set([darray._level_coords[lc] for lc in darray._level_coords])
    +
    +    valid_xy = (
    +        set(darray.dims) | set(darray.coords) | set(darray._level_coords)
    +    ) - multiindex
    +
    +    if xy not in valid_xy:
    +        valid_xy_str = "', '".join(sorted(valid_xy))
    +        raise ValueError(f"{name} must be one of None, '{valid_xy_str}'")
    +
    +
     def get_axis(figsize, size, aspect, ax):
         import matplotlib as mpl
         import matplotlib.pyplot as plt
    diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
    index af7c686bf60..6497987e813 100644
    --- a/xarray/tests/test_plot.py
    +++ b/xarray/tests/test_plot.py
    @@ -136,7 +136,7 @@ def test_label_from_attrs(self):
         def test1d(self):
             self.darray[:, 0, 0].plot()
     
    -        with raises_regex(ValueError, "None"):
    +        with raises_regex(ValueError, "x must be one of None, 'dim_0'"):
                 self.darray[:, 0, 0].plot(x="dim_1")
     
             with raises_regex(TypeError, "complex128"):
    @@ -155,14 +155,31 @@ def test_1d_x_y_kw(self):
             for aa, (x, y) in enumerate(xy):
                 da.plot(x=x, y=y, ax=ax.flat[aa])
     
    -        with raises_regex(ValueError, "cannot"):
    +        with raises_regex(ValueError, "Cannot specify both"):
                 da.plot(x="z", y="z")
     
    -        with raises_regex(ValueError, "None"):
    -            da.plot(x="f", y="z")
    +        error_msg = "must be one of None, 'z'"
    +        with raises_regex(ValueError, f"x {error_msg}"):
    +            da.plot(x="f")
     
    -        with raises_regex(ValueError, "None"):
    -            da.plot(x="z", y="f")
    +        with raises_regex(ValueError, f"y {error_msg}"):
    +            da.plot(y="f")
    +
    +    def test_multiindex_level_as_coord(self):
    +        da = xr.DataArray(
    +            np.arange(5),
    +            dims="x",
    +            coords=dict(a=("x", np.arange(5)), b=("x", np.arange(5, 10))),
    +        )
    +        da = da.set_index(x=["a", "b"])
    +
    +        for x in ["a", "b"]:
    +            h = da.plot(x=x)[0]
    +            assert_array_equal(h.get_xdata(), da[x].values)
    +
    +        for y in ["a", "b"]:
    +            h = da.plot(y=y)[0]
    +            assert_array_equal(h.get_ydata(), da[y].values)
     
         # Test for bug in GH issue #2725
         def test_infer_line_data(self):
    @@ -211,7 +228,7 @@ def test_2d_line(self):
             self.darray[:, :, 0].plot.line(x="dim_0", hue="dim_1")
             self.darray[:, :, 0].plot.line(y="dim_0", hue="dim_1")
     
    -        with raises_regex(ValueError, "cannot"):
    +        with raises_regex(ValueError, "Cannot"):
                 self.darray[:, :, 0].plot.line(x="dim_1", y="dim_0", hue="dim_1")
     
         def test_2d_line_accepts_legend_kw(self):
    @@ -1032,6 +1049,16 @@ def test_nonnumeric_index_raises_typeerror(self):
             with raises_regex(TypeError, r"[Pp]lot"):
                 self.plotfunc(a)
     
    +    def test_multiindex_raises_typeerror(self):
    +        a = DataArray(
    +            easy_array((3, 2)),
    +            dims=("x", "y"),
    +            coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])),
    +        )
    +        a = a.set_index(y=("a", "b"))
    +        with raises_regex(TypeError, r"[Pp]lot"):
    +            self.plotfunc(a)
    +
         def test_can_pass_in_axis(self):
             self.pass_in_axis(self.plotmethod)
     
    @@ -1140,15 +1167,16 @@ def test_positional_coord_string(self):
             assert "y_long_name [y_units]" == ax.get_ylabel()
     
         def test_bad_x_string_exception(self):
    -        with raises_regex(ValueError, "x and y must be coordinate variables"):
    +
    +        with raises_regex(ValueError, "x and y cannot be equal."):
    +            self.plotmethod(x="y", y="y")
    +
    +        error_msg = "must be one of None, 'x', 'x2d', 'y', 'y2d'"
    +        with raises_regex(ValueError, f"x {error_msg}"):
                 self.plotmethod("not_a_real_dim", "y")
    -        with raises_regex(
    -            ValueError, "x must be a dimension name if y is not supplied"
    -        ):
    +        with raises_regex(ValueError, f"x {error_msg}"):
                 self.plotmethod(x="not_a_real_dim")
    -        with raises_regex(
    -            ValueError, "y must be a dimension name if x is not supplied"
    -        ):
    +        with raises_regex(ValueError, f"y {error_msg}"):
                 self.plotmethod(y="not_a_real_dim")
             self.darray.coords["z"] = 100
     
    @@ -1183,6 +1211,27 @@ def test_non_linked_coords_transpose(self):
             # simply ensure that these high coords were passed over
             assert np.min(ax.get_xlim()) > 100.0
     
    +    def test_multiindex_level_as_coord(self):
    +        da = DataArray(
    +            easy_array((3, 2)),
    +            dims=("x", "y"),
    +            coords=dict(x=("x", [0, 1, 2]), a=("y", [0, 1]), b=("y", [2, 3])),
    +        )
    +        da = da.set_index(y=["a", "b"])
    +
    +        for x, y in (("a", "x"), ("b", "x"), ("x", "a"), ("x", "b")):
    +            self.plotfunc(da, x=x, y=y)
    +
    +            ax = plt.gca()
    +            assert x == ax.get_xlabel()
    +            assert y == ax.get_ylabel()
    +
    +        with raises_regex(ValueError, "levels of the same MultiIndex"):
    +            self.plotfunc(da, x="a", y="b")
    +
    +        with raises_regex(ValueError, "y must be one of None, 'a', 'b', 'x'"):
    +            self.plotfunc(da, x="a", y="y")
    +
         def test_default_title(self):
             a = DataArray(easy_array((4, 3, 2)), dims=["a", "b", "c"])
             a.coords["c"] = [0, 1]
    
    From 3194b3ed1e414729ba6ab6f7f3ed39a425da42b1 Mon Sep 17 00:00:00 2001
    From: Andrew Williams <56925856+AndrewWilliams3142@users.noreply.github.com>
    Date: Mon, 25 May 2020 17:55:33 +0100
    Subject: [PATCH 31/71] xr.cov() and xr.corr() (#4089)
    
    * Added chunks='auto' option in dataset.py
    
    * reverted accidental changes in dataset.chunk()
    
    * Added corr and cov to computation.py. Taken from r-beer:xarray/corr
    
    * Added r-beer's tests to test_computation.py
    
    Still issues I think
    
    * trying to fix github.com/pydata/xarray/pull/3550#discussion_r349935731
    
    * Removing drop=True from the `.where()` calls in `computation.py`+test.py
    
    * api.rst and whats-new.rst
    
    * Updated `xarray/__init__.py` and added `broadcast` import to computation
    
    * added DataArray import to corr, cov
    
    * assert_allclose added to test_computation.py
    
    * removed whitespace in test_dask...oops
    
    * Added to init
    
    * format changes
    
    * Fiddling around with cov/corr tests in `test_computation.py`
    
    * PEP8 changes
    
    * pep
    
    * remove old todo and comments
    
    * isort
    
    * Added consistency check between corr() and cov(), ensure they give same
    
    * added `skipna=False` to `computation.py`. made consistency+autocov tests
    
    * formatting
    
    * Added numpy-based tests.
    
    * format
    
    * formatting again
    
    * Update doc/whats-new.rst
    
    Co-authored-by: keewis 
    
    * refactored corr/cov so there is one internal method for calculating both
    
    * formatting
    
    * updating docstrings and code suggestions from PR
    
    * paramterize ddof in tests
    
    * removed extraneous test arrays
    
    * formatting + adding deterministic docstring
    
    * added test for TypeError
    
    * formatting
    
    * tidying up docstring
    
    * formatting and tidying up `_cov_corr()` so that the logic is more clear
    
    * flake8 ...
    
    Co-authored-by: keewis 
    ---
     doc/api.rst                      |   2 +
     doc/whats-new.rst                |   2 +
     xarray/__init__.py               |   4 +-
     xarray/core/computation.py       | 180 ++++++++++++++++++++++++++++++-
     xarray/tests/test_computation.py | 158 ++++++++++++++++++++++++++-
     5 files changed, 343 insertions(+), 3 deletions(-)
    
    diff --git a/doc/api.rst b/doc/api.rst
    index 8ec6843d24a..c9f24e8c3f1 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -29,6 +29,8 @@ Top-level functions
        full_like
        zeros_like
        ones_like
    +   cov
    +   corr
        dot
        polyval
        map_blocks
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 0be988da690..d4a46c1e020 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -36,6 +36,8 @@ Breaking changes
     
     New Features
     ~~~~~~~~~~~~
    +- Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
    +  By `Andrew Williams `_ and `Robin Beer `_.
     - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
       By `Pascal Bourgault `_.
     - Control over attributes of result in :py:func:`merge`, :py:func:`concat`,
    diff --git a/xarray/__init__.py b/xarray/__init__.py
    index 0fead57e5fb..e8274d13ffe 100644
    --- a/xarray/__init__.py
    +++ b/xarray/__init__.py
    @@ -17,7 +17,7 @@
     from .core.alignment import align, broadcast
     from .core.combine import auto_combine, combine_by_coords, combine_nested
     from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
    -from .core.computation import apply_ufunc, dot, polyval, where
    +from .core.computation import apply_ufunc, corr, cov, dot, polyval, where
     from .core.concat import concat
     from .core.dataarray import DataArray
     from .core.dataset import Dataset
    @@ -54,6 +54,8 @@
         "concat",
         "decode_cf",
         "dot",
    +    "cov",
    +    "corr",
         "full_like",
         "load_dataarray",
         "load_dataset",
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 28bf818e4a3..6ac4f74c3a6 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -24,7 +24,7 @@
     import numpy as np
     
     from . import dtypes, duck_array_ops, utils
    -from .alignment import deep_align
    +from .alignment import align, deep_align
     from .merge import merge_coordinates_without_align
     from .options import OPTIONS
     from .pycompat import dask_array_type
    @@ -1069,6 +1069,184 @@ def earth_mover_distance(first_samples,
             return apply_array_ufunc(func, *args, dask=dask)
     
     
    +def cov(da_a, da_b, dim=None, ddof=1):
    +    """
    +    Compute covariance between two DataArray objects along a shared dimension.
    +
    +    Parameters
    +    ----------
    +    da_a: DataArray object
    +        Array to compute.
    +    da_b: DataArray object
    +        Array to compute.
    +    dim : str, optional
    +        The dimension along which the covariance will be computed
    +    ddof: int, optional
    +        If ddof=1, covariance is normalized by N-1, giving an unbiased estimate,
    +        else normalization is by N.
    +
    +    Returns
    +    -------
    +    covariance: DataArray
    +
    +    See also
    +    --------
    +    pandas.Series.cov: corresponding pandas function
    +    xr.corr: respective function to calculate correlation
    +
    +    Examples
    +    --------
    +    >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    +    ...                  dims=("space", "time"),
    +    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    +    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_a
    +    
    +    array([[1. , 2. , 3. ],
    +           [0.1, 0.2, 0.3],
    +           [3.2, 0.6, 1.8]])
    +    Coordinates:
    +      * space    (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    ...                  dims=("space", "time"),
    +    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    +    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_b
    +    
    +    array([[ 0.2,  0.4,  0.6],
    +           [15. , 10. ,  5. ],
    +           [ 3.2,  0.6,  1.8]])
    +    Coordinates:
    +      * space    (space) >> xr.cov(da_a, da_b)
    +    
    +    array(-3.53055556)
    +    >>> xr.cov(da_a, da_b, dim='time')
    +    
    +    array([ 0.2, -0.5,  1.69333333])
    +    Coordinates:
    +      * space    (space) >> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    +    ...                  dims=("space", "time"),
    +    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    +    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_a
    +    
    +    array([[1. , 2. , 3. ],
    +           [0.1, 0.2, 0.3],
    +           [3.2, 0.6, 1.8]])
    +    Coordinates:
    +      * space    (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    ...                  dims=("space", "time"),
    +    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    +    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_b
    +    
    +    array([[ 0.2,  0.4,  0.6],
    +           [15. , 10. ,  5. ],
    +           [ 3.2,  0.6,  1.8]])
    +    Coordinates:
    +      * space    (space) >> xr.corr(da_a, da_b)
    +    
    +    array(-0.57087777)
    +    >>> xr.corr(da_a, da_b, dim='time')
    +    
    +    array([ 1., -1.,  1.])
    +    Coordinates:
    +      * space    (space) 
    Date: Mon, 25 May 2020 20:23:44 +0100
    Subject: [PATCH 32/71] Auto chunk (#4064)
    MIME-Version: 1.0
    Content-Type: text/plain; charset=UTF-8
    Content-Transfer-Encoding: 8bit
    
    * Added chunks='auto' option in dataset.py
    
    * FIX: correct dask array handling in _calc_idxminmax (#3922)
    
    * FIX: correct dask array handling in _calc_idxminmax
    
    * FIX: remove unneeded import, reformat via black
    
    * fix idxmax, idxmin with dask arrays
    
    * FIX: use array[dim].data in `_calc_idxminmax` as per @keewis suggestion, attach dim name to result
    
    * ADD: add dask tests to `idxmin`/`idxmax` dataarray tests
    
    * FIX: add back fixture line removed by accident
    
    * ADD: complete dask handling in `idxmin`/`idxmax` tests in test_dataarray, xfail dask tests for dtype dateime64 (M)
    
    * ADD: add "support dask handling for idxmin/idxmax" in whats-new.rst
    
    * MIN: reintroduce changes added by #3953
    
    * MIN: change if-clause to use `and` instead of `&` as per review-comment
    
    * MIN: change if-clause to use `and` instead of `&` as per review-comment
    
    * WIP: remove dask handling entirely for debugging purposes
    
    * Test for dask computes
    
    * WIP: re-add dask handling (map_blocks-approach), add `with raise_if_dask_computes()` context to idxmin-tests
    
    * Use dask indexing instead of map_blocks.
    
    * Better chunk choice.
    
    * Return -1 for _nan_argminmax_object if all NaNs along dim
    
    * Revert "Return -1 for _nan_argminmax_object if all NaNs along dim"
    
    This reverts commit 58901b9da821a04f2ec085577cb916c4d67f6f50.
    
    * Raise error for object arrays
    
    * No error for object arrays. Instead expect 1 compute in tests.
    
    Co-authored-by: dcherian 
    
    * fix the failing flake8 CI (#4057)
    
    * rename d and l to dim and length
    
    * Fixed typo in rasterio docs (#4063)
    
    * Added chunks='auto' option in dataset.py
    
    Added changes to whats-new.rst
    
    * Added chunks='auto' option in dataset.py
    
    Added changes to whats-new.rst
    
    * Error fix, catch chunks=None
    
    * Minor reformatting + flake8 changes
    
    * Added isinstance(chunks, (Number, str)) in dataset.py, passing
    
    * format changes
    
    * added auto-chunk test for dataarrays
    
    * Assert chunk sizes equal in auto-chunk test
    
    Co-authored-by: Kai Mühlbauer 
    Co-authored-by: dcherian 
    Co-authored-by: keewis 
    Co-authored-by: clausmichele <31700619+clausmichele@users.noreply.github.com>
    Co-authored-by: Keewis 
    ---
     doc/whats-new.rst         | 4 ++++
     xarray/core/dataset.py    | 9 ++++++---
     xarray/tests/test_dask.py | 8 ++++++++
     3 files changed, 18 insertions(+), 3 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index d4a46c1e020..e1012283c94 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -36,6 +36,10 @@ Breaking changes
     
     New Features
     ~~~~~~~~~~~~
    +
    +- ``chunks='auto'`` is now supported in the ``chunks`` argument of
    +  :py:meth:`Dataset.chunk`. (:issue:`4055`)
    +  By `Andrew Williams `_ 
     - Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
       By `Andrew Williams `_ and `Robin Beer `_.
     - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index d93f4044a6d..43f6ad9c90e 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -1707,7 +1707,10 @@ def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
         def chunk(
             self,
             chunks: Union[
    -            None, Number, Mapping[Hashable, Union[None, Number, Tuple[Number, ...]]]
    +            None,
    +            Number,
    +            str,
    +            Mapping[Hashable, Union[None, Number, str, Tuple[Number, ...]]],
             ] = None,
             name_prefix: str = "xarray-",
             token: str = None,
    @@ -1725,7 +1728,7 @@ def chunk(
     
             Parameters
             ----------
    -        chunks : int or mapping, optional
    +        chunks : int, 'auto' or mapping, optional
                 Chunk sizes along each dimension, e.g., ``5`` or
                 ``{'x': 5, 'y': 5}``.
             name_prefix : str, optional
    @@ -1742,7 +1745,7 @@ def chunk(
             """
             from dask.base import tokenize
     
    -        if isinstance(chunks, Number):
    +        if isinstance(chunks, (Number, str)):
                 chunks = dict.fromkeys(self.dims, chunks)
     
             if chunks is not None:
    diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
    index 75beb3757ca..6f714fe1825 100644
    --- a/xarray/tests/test_dask.py
    +++ b/xarray/tests/test_dask.py
    @@ -1035,6 +1035,14 @@ def test_unify_chunks_shallow_copy(obj, transform):
         assert_identical(obj, unified) and obj is not obj.unify_chunks()
     
     
    +@pytest.mark.parametrize("obj", [make_da()])
    +def test_auto_chunk_da(obj):
    +    actual = obj.chunk("auto").data
    +    expected = obj.data.rechunk("auto")
    +    np.testing.assert_array_equal(actual, expected)
    +    assert actual.chunks == expected.chunks
    +
    +
     def test_map_blocks_error(map_da, map_ds):
         def bad_func(darray):
             return (darray * darray.x + 5 * darray.y)[:1, :1]
    
    From d1f7cb8fd95d588d3f7a7e90916c25747b90ad5a Mon Sep 17 00:00:00 2001
    From: Keisuke Fujii 
    Date: Tue, 26 May 2020 05:02:36 +0900
    Subject: [PATCH 33/71] Improve interp performance (#4069)
    
    * Fixes 2223
    
    * more tests
    
    * add @requires_scipy to test
    
    * fix tests
    
    * black
    
    * update whatsnew. Added a test for nearest
    ---
     doc/whats-new.rst           |  7 +++++++
     xarray/core/missing.py      | 15 ++++++++++++++-
     xarray/testing.py           |  7 +------
     xarray/tests/test_interp.py | 18 ++++++++++++++++++
     4 files changed, 40 insertions(+), 7 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index e1012283c94..59c7faa8973 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -34,6 +34,13 @@ Breaking changes
       (:pull:`3274`)
       By `Elliott Sales de Andrade `_
     
    +Enhancements
    +~~~~~~~~~~~~
    +- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` 
    +  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially 
    +  rather than interpolating in multidimensional space. (:issue:`2223`)
    +  By `Keisuke Fujii `_.
    +
     New Features
     ~~~~~~~~~~~~
     
    diff --git a/xarray/core/missing.py b/xarray/core/missing.py
    index f973b4a5468..374eaec1fa7 100644
    --- a/xarray/core/missing.py
    +++ b/xarray/core/missing.py
    @@ -619,6 +619,19 @@ def interp(var, indexes_coords, method, **kwargs):
         # default behavior
         kwargs["bounds_error"] = kwargs.get("bounds_error", False)
     
    +    # check if the interpolation can be done in orthogonal manner
    +    if (
    +        len(indexes_coords) > 1
    +        and method in ["linear", "nearest"]
    +        and all(dest[1].ndim == 1 for dest in indexes_coords.values())
    +        and len(set([d[1].dims[0] for d in indexes_coords.values()]))
    +        == len(indexes_coords)
    +    ):
    +        # interpolate sequentially
    +        for dim, dest in indexes_coords.items():
    +            var = interp(var, {dim: dest}, method, **kwargs)
    +        return var
    +
         # target dimensions
         dims = list(indexes_coords)
         x, new_x = zip(*[indexes_coords[d] for d in dims])
    @@ -659,7 +672,7 @@ def interp_func(var, x, new_x, method, kwargs):
             New coordinates. Should not contain NaN.
         method: string
             {'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'} for
    -        1-dimensional itnterpolation.
    +        1-dimensional interpolation.
             {'linear', 'nearest'} for multidimensional interpolation
         **kwargs:
             Optional keyword arguments to be passed to scipy.interpolator
    diff --git a/xarray/testing.py b/xarray/testing.py
    index ac189f7e023..e7bf5f9221a 100644
    --- a/xarray/testing.py
    +++ b/xarray/testing.py
    @@ -10,12 +10,7 @@
     from xarray.core.indexes import default_indexes
     from xarray.core.variable import IndexVariable, Variable
     
    -__all__ = (
    -    "assert_allclose",
    -    "assert_chunks_equal",
    -    "assert_equal",
    -    "assert_identical",
    -)
    +__all__ = ("assert_allclose", "assert_chunks_equal", "assert_equal", "assert_identical")
     
     
     def _decode_string_data(data):
    diff --git a/xarray/tests/test_interp.py b/xarray/tests/test_interp.py
    index 0502348160e..7a0dda216e2 100644
    --- a/xarray/tests/test_interp.py
    +++ b/xarray/tests/test_interp.py
    @@ -699,3 +699,21 @@ def test_3641():
         times = xr.cftime_range("0001", periods=3, freq="500Y")
         da = xr.DataArray(range(3), dims=["time"], coords=[times])
         da.interp(time=["0002-05-01"])
    +
    +
    +@requires_scipy
    +@pytest.mark.parametrize("method", ["nearest", "linear"])
    +def test_decompose(method):
    +    da = xr.DataArray(
    +        np.arange(6).reshape(3, 2),
    +        dims=["x", "y"],
    +        coords={"x": [0, 1, 2], "y": [-0.1, -0.3]},
    +    )
    +    x_new = xr.DataArray([0.5, 1.5, 2.5], dims=["x1"])
    +    y_new = xr.DataArray([-0.15, -0.25], dims=["y1"])
    +    x_broadcast, y_broadcast = xr.broadcast(x_new, y_new)
    +    assert x_broadcast.ndim == 2
    +
    +    actual = da.interp(x=x_new, y=y_new, method=method).drop(("x", "y"))
    +    expected = da.interp(x=x_broadcast, y=y_broadcast, method=method).drop(("x", "y"))
    +    assert_allclose(actual, expected)
    
    From 864877c313d026ea5664570741a328324064f77c Mon Sep 17 00:00:00 2001
    From: Andrew Williams <56925856+AndrewWilliams3142@users.noreply.github.com>
    Date: Tue, 26 May 2020 20:03:24 +0100
    Subject: [PATCH 34/71] Corrcov typo fix (#4096)
    
    * fixing typo in corr, cov docstrings
    
    * reverted accidental changes in dataset.chunk()
    
    * pep8
    ---
     xarray/core/computation.py | 4 ++--
     1 file changed, 2 insertions(+), 2 deletions(-)
    
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 6ac4f74c3a6..5e172ea29ab 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -1108,7 +1108,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
         Coordinates:
           * space    (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    >>> da_b = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
         ...                  dims=("space", "time"),
         ...                  coords=[('space', ['IA', 'IL', 'IN']),
         ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    @@ -1177,7 +1177,7 @@ def corr(da_a, da_b, dim=None):
         Coordinates:
           * space    (space) >> da_a = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    >>> da_b = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
         ...                  dims=("space", "time"),
         ...                  coords=[('space', ['IA', 'IL', 'IN']),
         ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    
    From e5cc19cd8f8a69e0743f230f5bf51b7778a0ff96 Mon Sep 17 00:00:00 2001
    From: Deepak Cherian 
    Date: Tue, 26 May 2020 22:20:01 +0000
    Subject: [PATCH 35/71] Fix conversion of multiindexed pandas objects to sparse
     xarray objects (#4088)
    
    * Fix conversion of multiindexed pandas objects to sparse xarray objects
    
    * lint
    
    * fix whats-new
    
    * fix test
    
    * minor whats-new
    ---
     doc/whats-new.rst              |  2 ++
     xarray/core/dataset.py         |  2 +-
     xarray/tests/test_dataarray.py | 18 ++++++++++++++++++
     3 files changed, 21 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 59c7faa8973..a32e0393bcf 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -102,6 +102,8 @@ Bug fixes
     ~~~~~~~~~
     - Support dark mode in VS code (:issue:`4024`)
       By `Keisuke Fujii `_.
    +- Fix bug when converting multiindexed Pandas objects to sparse xarray objects. (:issue:`4019`)
    +  By `Deepak Cherian `_.
     - ``ValueError`` is raised when ``fill_value`` is not a scalar in :py:meth:`full_like`. (:issue:`3977`)
       By `Huite Bootsma `_.
     - Fix wrong order in converting a ``pd.Series`` with a MultiIndex to ``DataArray``. (:issue:`3951`)
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 43f6ad9c90e..2d0044711fe 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -4537,7 +4537,7 @@ def _set_sparse_data_from_dataframe(
             idx = dataframe.index
             if isinstance(idx, pd.MultiIndex):
                 coords = np.stack([np.asarray(code) for code in idx.codes], axis=0)
    -            is_sorted = idx.is_lexsorted
    +            is_sorted = idx.is_lexsorted()
                 shape = tuple(lev.size for lev in idx.levels)
             else:
                 coords = np.arange(idx.size).reshape(1, -1)
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index a01234616a4..54a77261fb4 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -3532,6 +3532,24 @@ def test_from_series_sparse(self):
             actual_sparse.data = actual_sparse.data.todense()
             assert_identical(actual_sparse, actual_dense)
     
    +    @requires_sparse
    +    def test_from_multiindex_series_sparse(self):
    +        # regression test for GH4019
    +        import sparse
    +
    +        idx = pd.MultiIndex.from_product([np.arange(3), np.arange(5)], names=["a", "b"])
    +        series = pd.Series(np.random.RandomState(0).random(len(idx)), index=idx).sample(
    +            n=5, random_state=3
    +        )
    +
    +        dense = DataArray.from_series(series, sparse=False)
    +        expected_coords = sparse.COO.from_numpy(dense.data, np.nan).coords
    +
    +        actual_sparse = xr.DataArray.from_series(series, sparse=True)
    +        actual_coords = actual_sparse.data.coords
    +
    +        np.testing.assert_equal(actual_coords, expected_coords)
    +
         def test_to_and_from_empty_series(self):
             # GH697
             expected = pd.Series([], dtype=np.float64)
    
    From a2e9804d372a2e8a0d83904544b1d8421bfa2b44 Mon Sep 17 00:00:00 2001
    From: Akio Taniguchi 
    Date: Sat, 30 May 2020 01:50:58 +0900
    Subject: [PATCH 36/71] Add xarray-custom to related projects (#4109)
    
    ---
     doc/related-projects.rst | 1 +
     1 file changed, 1 insertion(+)
    
    diff --git a/doc/related-projects.rst b/doc/related-projects.rst
    index b02c4be7338..9891f1a6bc2 100644
    --- a/doc/related-projects.rst
    +++ b/doc/related-projects.rst
    @@ -62,6 +62,7 @@ Extend xarray capabilities
     - `eofs `_: EOF analysis in Python.
     - `hypothesis-gufunc `_: Extension to hypothesis. Makes it easy to write unit tests with xarray objects as input.
     - `nxarray `_: NeXus input/output capability for xarray.
    +- `xarray-custom `_: Data classes for custom xarray creation.
     - `xarray_extras `_: Advanced algorithms for xarray objects (e.g. integrations/interpolations).
     - `xpublish `_: Publish Xarray Datasets via a Zarr compatible REST API.
     - `xrft `_: Fourier transforms for xarray data.
    
    From 73b013f22fe050ecd758b9c13fb5a06c4a8ba22e Mon Sep 17 00:00:00 2001
    From: Phillip Butcher 
    Date: Sat, 30 May 2020 06:15:55 -0700
    Subject: [PATCH 37/71] Assign default group name in groupby if name=None
     (#158) (#4098)
    
    * Assign default group name in groupby (#158)
    
    * When groupby receives a DataArray with name=None assign name='group'
    
    * Previously when name=None a ValueError: `group` must have a name was raised
    
    * Closes #158
    
    * Add test
    
    * Update whats-new.rst
    
    * black
    
    * Add assert statement to test group name was added to DataArray
    
    Co-authored-by: phillipbutcher 
    ---
     doc/whats-new.rst            |  2 ++
     xarray/core/groupby.py       |  2 +-
     xarray/tests/test_groupby.py | 12 ++++++++++++
     3 files changed, 15 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index a32e0393bcf..e06ed5be897 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -100,6 +100,8 @@ New Features
     
     Bug fixes
     ~~~~~~~~~
    +- If groupby receives a ``DataArray`` with name=None, assign a default name (:issue:`158`)
    +  By `Phil Butcher `_.
     - Support dark mode in VS code (:issue:`4024`)
       By `Keisuke Fujii `_.
     - Fix bug when converting multiindexed Pandas objects to sparse xarray objects. (:issue:`4019`)
    diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
    index 299cb8ec4fa..04c0fabae6a 100644
    --- a/xarray/core/groupby.py
    +++ b/xarray/core/groupby.py
    @@ -321,7 +321,7 @@ def __init__(
                     group = _DummyGroup(obj, group.name, group.coords)
     
             if getattr(group, "name", None) is None:
    -            raise ValueError("`group` must have a name")
    +            group.name = "group"
     
             group, obj, stacked_dim, inserted_dims = _ensure_1d(group, obj)
             (group_dim,) = group.dims
    diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
    index 866d5fb0899..aa54c8f36f1 100644
    --- a/xarray/tests/test_groupby.py
    +++ b/xarray/tests/test_groupby.py
    @@ -538,4 +538,16 @@ def test_groupby_bins_timeseries():
         assert_identical(actual, expected)
     
     
    +def test_groupby_none_group_name():
    +    # GH158
    +    # xarray should not fail if a DataArray's name attribute is None
    +
    +    data = np.arange(10) + 10
    +    da = xr.DataArray(data)  # da.name = None
    +    key = xr.DataArray(np.floor_divide(data, 2))
    +
    +    mean = da.groupby(key).mean()
    +    assert "group" in mean.dims
    +
    +
     # TODO: move other groupby tests from test_dataset and test_dataarray over here
    
    From fd9e620a84389170138cc014ee5a0213718beb78 Mon Sep 17 00:00:00 2001
    From: Pascal Bourgault 
    Date: Sat, 30 May 2020 14:08:26 -0400
    Subject: [PATCH 38/71] xr.infer_freq (#4033)
    
    * xr.infer_freq and related code
    
    * Formatting and comments
    
    * Rewrite _CFTimeFrequencyInferer independently of pandas
    
    * Syntax and add frequency.py file
    
    * Fix tests and month_deltas
    
    * Require cftime 1.1.0 for the test
    
    * Apply suggestions from code review
    
    Co-authored-by: Spencer Clark 
    
    * Changes following code review
    
    * Docs
    
    * Docs
    
    * Black
    
    * Fix tests for requiring cftime 1.1.0
    
    * Update whats-new
    
    * Apply suggestions from code review
    
    Co-authored-by: Spencer Clark 
    Co-authored-by: Mathias Hauser 
    
    * Add invalid input tests for better coverage
    
    * Fix link in whats-new.rst
    
    Co-authored-by: Spencer Clark 
    Co-authored-by: Mathias Hauser 
    ---
     doc/api.rst                      |   1 +
     doc/weather-climate.rst          |   9 +
     doc/whats-new.rst                |   3 +-
     xarray/__init__.py               |   2 +
     xarray/coding/cftimeindex.py     |   3 +-
     xarray/coding/frequencies.py     | 272 +++++++++++++++++++++++++++++++
     xarray/tests/test_cftimeindex.py |  70 ++++++++
     7 files changed, 358 insertions(+), 2 deletions(-)
     create mode 100644 xarray/coding/frequencies.py
    
    diff --git a/doc/api.rst b/doc/api.rst
    index c9f24e8c3f1..3f25ac1a070 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -26,6 +26,7 @@ Top-level functions
        combine_nested
        where
        set_options
    +   infer_freq
        full_like
        zeros_like
        ones_like
    diff --git a/doc/weather-climate.rst b/doc/weather-climate.rst
    index 1eb63d24630..f03dfd14c73 100644
    --- a/doc/weather-climate.rst
    +++ b/doc/weather-climate.rst
    @@ -74,6 +74,15 @@ instance, we can create the same dates and DataArray we created above using:
         dates = xr.cftime_range(start="0001", periods=24, freq="MS", calendar="noleap")
         da = xr.DataArray(np.arange(24), coords=[dates], dims=["time"], name="foo")
     
    +Mirroring pandas' method with the same name, :py:meth:`~xarray.infer_freq` allows one to
    +infer the sampling frequency of a :py:class:`~xarray.CFTimeIndex` or a 1-D
    +:py:class:`~xarray.DataArray` containing cftime objects. It also works transparently with
    +``np.datetime64[ns]`` and ``np.timedelta64[ns]`` data.
    +
    +.. ipython:: python
    +
    +    xr.infer_freq(dates)
    +
     With :py:meth:`~xarray.CFTimeIndex.strftime` we can also easily generate formatted strings from
     the datetime values of a :py:class:`~xarray.CFTimeIndex` directly or through the
     :py:meth:`~xarray.DataArray.dt` accessor for a :py:class:`~xarray.DataArray`
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index e06ed5be897..e8e30917cff 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -43,7 +43,8 @@ Enhancements
     
     New Features
     ~~~~~~~~~~~~
    -
    +- Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`).
    +  By `Pascal Bourgault `_.
     - ``chunks='auto'`` is now supported in the ``chunks`` argument of
       :py:meth:`Dataset.chunk`. (:issue:`4055`)
       By `Andrew Williams `_ 
    diff --git a/xarray/__init__.py b/xarray/__init__.py
    index e8274d13ffe..cb4824d188d 100644
    --- a/xarray/__init__.py
    +++ b/xarray/__init__.py
    @@ -13,6 +13,7 @@
     from .backends.zarr import open_zarr
     from .coding.cftime_offsets import cftime_range
     from .coding.cftimeindex import CFTimeIndex
    +from .coding.frequencies import infer_freq
     from .conventions import SerializationWarning, decode_cf
     from .core.alignment import align, broadcast
     from .core.combine import auto_combine, combine_by_coords, combine_nested
    @@ -57,6 +58,7 @@
         "cov",
         "corr",
         "full_like",
    +    "infer_freq",
         "load_dataarray",
         "load_dataset",
         "map_blocks",
    diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py
    index 6fc28d213dd..2a7eaa99edb 100644
    --- a/xarray/coding/cftimeindex.py
    +++ b/xarray/coding/cftimeindex.py
    @@ -578,7 +578,8 @@ def asi8(self):
                 [
                     _total_microseconds(exact_cftime_datetime_difference(epoch, date))
                     for date in self.values
    -            ]
    +            ],
    +            dtype=np.int64,
             )
     
         def _round_via_method(self, freq, method):
    diff --git a/xarray/coding/frequencies.py b/xarray/coding/frequencies.py
    new file mode 100644
    index 00000000000..86f84ba5fbd
    --- /dev/null
    +++ b/xarray/coding/frequencies.py
    @@ -0,0 +1,272 @@
    +"""FrequencyInferer analog for cftime.datetime objects"""
    +# The infer_freq method and the _CFTimeFrequencyInferer
    +# subclass defined here were copied and adapted for
    +# use with cftime.datetime objects based on the source code in
    +# pandas.tseries.Frequencies._FrequencyInferer
    +
    +# For reference, here is a copy of the pandas copyright notice:
    +
    +# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
    +# All rights reserved.
    +
    +# Copyright (c) 2008-2011 AQR Capital Management, LLC
    +# All rights reserved.
    +
    +# Redistribution and use in source and binary forms, with or without
    +# modification, are permitted provided that the following conditions are
    +# met:
    +
    +#     * Redistributions of source code must retain the above copyright
    +#        notice, this list of conditions and the following disclaimer.
    +
    +#     * Redistributions in binary form must reproduce the above
    +#        copyright notice, this list of conditions and the following
    +#        disclaimer in the documentation and/or other materials provided
    +#        with the distribution.
    +
    +#     * Neither the name of the copyright holder nor the names of any
    +#        contributors may be used to endorse or promote products derived
    +#        from this software without specific prior written permission.
    +
    +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
    +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    +
    +import numpy as np
    +import pandas as pd
    +
    +from ..core.common import _contains_datetime_like_objects
    +from .cftime_offsets import _MONTH_ABBREVIATIONS
    +from .cftimeindex import CFTimeIndex
    +
    +_ONE_MICRO = 1
    +_ONE_MILLI = _ONE_MICRO * 1000
    +_ONE_SECOND = _ONE_MILLI * 1000
    +_ONE_MINUTE = 60 * _ONE_SECOND
    +_ONE_HOUR = 60 * _ONE_MINUTE
    +_ONE_DAY = 24 * _ONE_HOUR
    +
    +
    +def infer_freq(index):
    +    """
    +    Infer the most likely frequency given the input index.
    +
    +    Parameters
    +    ----------
    +    index : CFTimeIndex, DataArray, pd.DatetimeIndex, pd.TimedeltaIndex, pd.Series
    +      If not passed a CFTimeIndex, this simply calls `pandas.infer_freq`.
    +      If passed a Series or a DataArray will use the values of the series (NOT THE INDEX).
    +
    +    Returns
    +    -------
    +    str or None
    +        None if no discernible frequency.
    +
    +    Raises
    +    ------
    +    TypeError
    +        If the index is not datetime-like.
    +    ValueError
    +        If there are fewer than three values or the index is not 1D.
    +    """
    +    from xarray.core.dataarray import DataArray
    +
    +    if isinstance(index, (DataArray, pd.Series)):
    +        if index.ndim != 1:
    +            raise ValueError("'index' must be 1D")
    +        elif not _contains_datetime_like_objects(DataArray(index)):
    +            raise ValueError("'index' must contain datetime-like objects")
    +        dtype = np.asarray(index).dtype
    +        if dtype == "datetime64[ns]":
    +            index = pd.DatetimeIndex(index.values)
    +        elif dtype == "timedelta64[ns]":
    +            index = pd.TimedeltaIndex(index.values)
    +        else:
    +            index = CFTimeIndex(index.values)
    +
    +    if isinstance(index, CFTimeIndex):
    +        inferer = _CFTimeFrequencyInferer(index)
    +        return inferer.get_freq()
    +
    +    return pd.infer_freq(index)
    +
    +
    +class _CFTimeFrequencyInferer:  # (pd.tseries.frequencies._FrequencyInferer):
    +    def __init__(self, index):
    +        self.index = index
    +        self.values = index.asi8
    +
    +        if len(index) < 3:
    +            raise ValueError("Need at least 3 dates to infer frequency")
    +
    +        self.is_monotonic = (
    +            self.index.is_monotonic_decreasing or self.index.is_monotonic_increasing
    +        )
    +
    +        self._deltas = None
    +        self._year_deltas = None
    +        self._month_deltas = None
    +
    +    def get_freq(self):
    +        """Find the appropriate frequency string to describe the inferred frequency of self.index
    +
    +        Adapted from `pandas.tsseries.frequencies._FrequencyInferer.get_freq` for CFTimeIndexes.
    +
    +        Returns
    +        -------
    +        str or None
    +        """
    +        if not self.is_monotonic or not self.index.is_unique:
    +            return None
    +
    +        delta = self.deltas[0]  # Smallest delta
    +        if _is_multiple(delta, _ONE_DAY):
    +            return self._infer_daily_rule()
    +        # There is no possible intraday frequency with a non-unique delta
    +        # Different from pandas: we don't need to manage DST and business offsets in cftime
    +        elif not len(self.deltas) == 1:
    +            return None
    +
    +        if _is_multiple(delta, _ONE_HOUR):
    +            return _maybe_add_count("H", delta / _ONE_HOUR)
    +        elif _is_multiple(delta, _ONE_MINUTE):
    +            return _maybe_add_count("T", delta / _ONE_MINUTE)
    +        elif _is_multiple(delta, _ONE_SECOND):
    +            return _maybe_add_count("S", delta / _ONE_SECOND)
    +        elif _is_multiple(delta, _ONE_MILLI):
    +            return _maybe_add_count("L", delta / _ONE_MILLI)
    +        else:
    +            return _maybe_add_count("U", delta / _ONE_MICRO)
    +
    +    def _infer_daily_rule(self):
    +        annual_rule = self._get_annual_rule()
    +        if annual_rule:
    +            nyears = self.year_deltas[0]
    +            month = _MONTH_ABBREVIATIONS[self.index[0].month]
    +            alias = f"{annual_rule}-{month}"
    +            return _maybe_add_count(alias, nyears)
    +
    +        quartely_rule = self._get_quartely_rule()
    +        if quartely_rule:
    +            nquarters = self.month_deltas[0] / 3
    +            mod_dict = {0: 12, 2: 11, 1: 10}
    +            month = _MONTH_ABBREVIATIONS[mod_dict[self.index[0].month % 3]]
    +            alias = f"{quartely_rule}-{month}"
    +            return _maybe_add_count(alias, nquarters)
    +
    +        monthly_rule = self._get_monthly_rule()
    +        if monthly_rule:
    +            return _maybe_add_count(monthly_rule, self.month_deltas[0])
    +
    +        if len(self.deltas) == 1:
    +            # Daily as there is no "Weekly" offsets with CFTime
    +            days = self.deltas[0] / _ONE_DAY
    +            return _maybe_add_count("D", days)
    +
    +        # CFTime has no business freq and no "week of month" (WOM)
    +        return None
    +
    +    def _get_annual_rule(self):
    +        if len(self.year_deltas) > 1:
    +            return None
    +
    +        if len(np.unique(self.index.month)) > 1:
    +            return None
    +
    +        return {"cs": "AS", "ce": "A"}.get(month_anchor_check(self.index))
    +
    +    def _get_quartely_rule(self):
    +        if len(self.month_deltas) > 1:
    +            return None
    +
    +        if not self.month_deltas[0] % 3 == 0:
    +            return None
    +
    +        return {"cs": "QS", "ce": "Q"}.get(month_anchor_check(self.index))
    +
    +    def _get_monthly_rule(self):
    +        if len(self.month_deltas) > 1:
    +            return None
    +
    +        return {"cs": "MS", "ce": "M"}.get(month_anchor_check(self.index))
    +
    +    @property
    +    def deltas(self):
    +        """Sorted unique timedeltas as microseconds."""
    +        if self._deltas is None:
    +            self._deltas = _unique_deltas(self.values)
    +        return self._deltas
    +
    +    @property
    +    def year_deltas(self):
    +        """Sorted unique year deltas."""
    +        if self._year_deltas is None:
    +            self._year_deltas = _unique_deltas(self.index.year)
    +        return self._year_deltas
    +
    +    @property
    +    def month_deltas(self):
    +        """Sorted unique month deltas."""
    +        if self._month_deltas is None:
    +            self._month_deltas = _unique_deltas(self.index.year * 12 + self.index.month)
    +        return self._month_deltas
    +
    +
    +def _unique_deltas(arr):
    +    """Sorted unique deltas of numpy array"""
    +    return np.sort(np.unique(np.diff(arr)))
    +
    +
    +def _is_multiple(us, mult: int):
    +    """Whether us is a multiple of mult"""
    +    return us % mult == 0
    +
    +
    +def _maybe_add_count(base: str, count: float):
    +    """If count is greater than 1, add it to the base offset string"""
    +    if count != 1:
    +        assert count == int(count)
    +        count = int(count)
    +        return f"{count}{base}"
    +    else:
    +        return base
    +
    +
    +def month_anchor_check(dates):
    +    """Return the monthly offset string.
    +
    +    Return "cs" if all dates are the first days of the month,
    +    "ce" if all dates are the last day of the month,
    +    None otherwise.
    +
    +    Replicated pandas._libs.tslibs.resolution.month_position_check
    +    but without business offset handling.
    +    """
    +    calendar_end = True
    +    calendar_start = True
    +
    +    for date in dates:
    +        if calendar_start:
    +            calendar_start &= date.day == 1
    +
    +        if calendar_end:
    +            cal = date.day == date.daysinmonth
    +            if calendar_end:
    +                calendar_end &= cal
    +        elif not calendar_start:
    +            break
    +
    +    if calendar_end:
    +        return "ce"
    +    elif calendar_start:
    +        return "cs"
    +    else:
    +        return None
    diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py
    index b30e32c92ad..745ae341370 100644
    --- a/xarray/tests/test_cftimeindex.py
    +++ b/xarray/tests/test_cftimeindex.py
    @@ -1046,3 +1046,73 @@ def test_asi8_distant_date():
         result = index.asi8
         expected = np.array([1000000 * 86400 * 400 * 8000 + 12345 * 1000000 + 123456])
         np.testing.assert_array_equal(result, expected)
    +
    +
    +@requires_cftime_1_1_0
    +def test_infer_freq_valid_types():
    +    cf_indx = xr.cftime_range("2000-01-01", periods=3, freq="D")
    +    assert xr.infer_freq(cf_indx) == "D"
    +    assert xr.infer_freq(xr.DataArray(cf_indx)) == "D"
    +
    +    pd_indx = pd.date_range("2000-01-01", periods=3, freq="D")
    +    assert xr.infer_freq(pd_indx) == "D"
    +    assert xr.infer_freq(xr.DataArray(pd_indx)) == "D"
    +
    +    pd_td_indx = pd.timedelta_range(start="1D", periods=3, freq="D")
    +    assert xr.infer_freq(pd_td_indx) == "D"
    +    assert xr.infer_freq(xr.DataArray(pd_td_indx)) == "D"
    +
    +
    +@requires_cftime_1_1_0
    +def test_infer_freq_invalid_inputs():
    +    # Non-datetime DataArray
    +    with pytest.raises(ValueError, match="must contain datetime-like objects"):
    +        xr.infer_freq(xr.DataArray([0, 1, 2]))
    +
    +    indx = xr.cftime_range("1990-02-03", periods=4, freq="MS")
    +    # 2D DataArray
    +    with pytest.raises(ValueError, match="must be 1D"):
    +        xr.infer_freq(xr.DataArray([indx, indx]))
    +
    +    # CFTimeIndex too short
    +    with pytest.raises(ValueError, match="Need at least 3 dates to infer frequency"):
    +        xr.infer_freq(indx[:2])
    +
    +    # Non-monotonic input
    +    assert xr.infer_freq(indx[np.array([0, 2, 1, 3])]) is None
    +
    +    # Non-unique input
    +    assert xr.infer_freq(indx[np.array([0, 1, 1, 2])]) is None
    +
    +    # No unique frequency (here 1st step is MS, second is 2MS)
    +    assert xr.infer_freq(indx[np.array([0, 1, 3])]) is None
    +
    +    # Same, but for QS
    +    indx = xr.cftime_range("1990-02-03", periods=4, freq="QS")
    +    assert xr.infer_freq(indx[np.array([0, 1, 3])]) is None
    +
    +
    +@requires_cftime_1_1_0
    +@pytest.mark.parametrize(
    +    "freq",
    +    [
    +        "300AS-JAN",
    +        "A-DEC",
    +        "AS-JUL",
    +        "2AS-FEB",
    +        "Q-NOV",
    +        "3QS-DEC",
    +        "MS",
    +        "4M",
    +        "7D",
    +        "D",
    +        "30H",
    +        "5T",
    +        "40S",
    +    ],
    +)
    +@pytest.mark.parametrize("calendar", _CFTIME_CALENDARS)
    +def test_infer_freq(freq, calendar):
    +    indx = xr.cftime_range("2000-01-01", periods=3, freq=freq, calendar=calendar)
    +    out = xr.infer_freq(indx)
    +    assert out == freq
    
    From 93b2d040ff17baffd1db976acf4e6cd0c8291045 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Tue, 2 Jun 2020 01:11:08 +0200
    Subject: [PATCH 39/71] update numpy's intersphinx url (#4117)
    
    ---
     doc/conf.py | 2 +-
     1 file changed, 1 insertion(+), 1 deletion(-)
    
    diff --git a/doc/conf.py b/doc/conf.py
    index 5d304dab362..6b16468d29e 100644
    --- a/doc/conf.py
    +++ b/doc/conf.py
    @@ -351,7 +351,7 @@
         "python": ("https://docs.python.org/3/", None),
         "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None),
         "iris": ("https://scitools.org.uk/iris/docs/latest", None),
    -    "numpy": ("https://docs.scipy.org/doc/numpy", None),
    +    "numpy": ("https://numpy.org/doc/stable", None),
         "scipy": ("https://docs.scipy.org/doc/scipy/reference", None),
         "numba": ("https://numba.pydata.org/numba-doc/latest", None),
         "matplotlib": ("https://matplotlib.org", None),
    
    From 09df5ca4036d84620373fa4bccd11d1f1d4bec28 Mon Sep 17 00:00:00 2001
    From: Pascal Bourgault 
    Date: Fri, 5 Jun 2020 11:45:59 -0400
    Subject: [PATCH 40/71] Allow non-unique and non-monotonic coordinates in
     get_clean_interp_index and polyfit (#4099)
    
    * Allow non-unique and non-monotonic in get_clean_interp_index and polyfit
    
    * black on missing.py
    
    * Apply change to polyval, add pr to whats new
    
    * Add tests for get_clean_interp_index return values
    ---
     doc/whats-new.rst            |  2 +-
     xarray/core/computation.py   |  2 +-
     xarray/core/dataset.py       |  2 +-
     xarray/core/missing.py       | 17 +++++++++++------
     xarray/tests/test_missing.py | 12 ++++++++++++
     5 files changed, 26 insertions(+), 9 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index e8e30917cff..bf9347d46a2 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -50,7 +50,7 @@ New Features
       By `Andrew Williams `_ 
     - Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
       By `Andrew Williams `_ and `Robin Beer `_.
    -- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`)
    +- Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`)
       By `Pascal Bourgault `_.
     - Control over attributes of result in :py:func:`merge`, :py:func:`concat`,
       :py:func:`combine_by_coords` and :py:func:`combine_nested` using
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 5e172ea29ab..cecd4fd8e70 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -1506,7 +1506,7 @@ def polyval(coord, coeffs, degree_dim="degree"):
         from .dataarray import DataArray
         from .missing import get_clean_interp_index
     
    -    x = get_clean_interp_index(coord, coord.name)
    +    x = get_clean_interp_index(coord, coord.name, strict=False)
     
         deg_coord = coeffs[degree_dim]
     
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 2d0044711fe..d50c6e1951e 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -5839,7 +5839,7 @@ def polyfit(
             variables = {}
             skipna_da = skipna
     
    -        x = get_clean_interp_index(self, dim)
    +        x = get_clean_interp_index(self, dim, strict=False)
             xname = "{}_".format(self[dim].name)
             order = int(deg) + 1
             lhs = np.vander(x, order)
    diff --git a/xarray/core/missing.py b/xarray/core/missing.py
    index 374eaec1fa7..59d4f777c73 100644
    --- a/xarray/core/missing.py
    +++ b/xarray/core/missing.py
    @@ -208,7 +208,9 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs):
         return ds
     
     
    -def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool] = True):
    +def get_clean_interp_index(
    +    arr, dim: Hashable, use_coordinate: Union[str, bool] = True, strict: bool = True
    +):
         """Return index to use for x values in interpolation or curve fitting.
     
         Parameters
    @@ -221,6 +223,8 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
           If use_coordinate is True, the coordinate that shares the name of the
           dimension along which interpolation is being performed will be used as the
           x values. If False, the x values are set as an equally spaced sequence.
    +    strict : bool
    +      Whether to raise errors if the index is either non-unique or non-monotonic (default).
     
         Returns
         -------
    @@ -257,11 +261,12 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
         if isinstance(index, pd.MultiIndex):
             index.name = dim
     
    -    if not index.is_monotonic:
    -        raise ValueError(f"Index {index.name!r} must be monotonically increasing")
    +    if strict:
    +        if not index.is_monotonic:
    +            raise ValueError(f"Index {index.name!r} must be monotonically increasing")
     
    -    if not index.is_unique:
    -        raise ValueError(f"Index {index.name!r} has duplicate values")
    +        if not index.is_unique:
    +            raise ValueError(f"Index {index.name!r} has duplicate values")
     
         # Special case for non-standard calendar indexes
         # Numerical datetime values are defined with respect to 1970-01-01T00:00:00 in units of nanoseconds
    @@ -282,7 +287,7 @@ def get_clean_interp_index(arr, dim: Hashable, use_coordinate: Union[str, bool]
             # xarray/numpy raise a ValueError
             raise TypeError(
                 f"Index {index.name!r} must be castable to float64 to support "
    -            f"interpolation, got {type(index).__name__}."
    +            f"interpolation or curve fitting, got {type(index).__name__}."
             )
     
         return index
    diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
    index 731cd165244..bc186c8bd15 100644
    --- a/xarray/tests/test_missing.py
    +++ b/xarray/tests/test_missing.py
    @@ -534,6 +534,18 @@ def test_get_clean_interp_index_potential_overflow():
         get_clean_interp_index(da, "time")
     
     
    +@pytest.mark.parametrize("index", ([0, 2, 1], [0, 1, 1]))
    +def test_get_clean_interp_index_strict(index):
    +    da = xr.DataArray([0, 1, 2], dims=("x",), coords={"x": index})
    +
    +    with pytest.raises(ValueError):
    +        get_clean_interp_index(da, "x")
    +
    +    clean = get_clean_interp_index(da, "x", strict=False)
    +    np.testing.assert_array_equal(index, clean)
    +    assert clean.dtype == np.float64
    +
    +
     @pytest.fixture
     def da_time():
         return xr.DataArray(
    
    From 274bd4b98235557f643eb110e77ee09c3c8689bc Mon Sep 17 00:00:00 2001
    From: Dave Cole 
    Date: Sat, 6 Jun 2020 05:32:49 +1000
    Subject: [PATCH 41/71] Fix open_rasterio() for WarpedVRT with specified
     src_crs (#4104)
    
    * Test open_rasterio() support of WarpedVRT with specified src_crs
    
    * Pass additional WarpedVRT params when recreating in open_rasterio()
    
    * Add description to `whats-new.rst`
    
    * Update doc/whats-new.rst
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst             |  2 ++
     xarray/backends/rasterio_.py  |  7 +++++--
     xarray/tests/test_backends.py | 13 +++++++++++++
     3 files changed, 20 insertions(+), 2 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index bf9347d46a2..0c5e61addf5 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -143,6 +143,8 @@ Bug fixes
       By `Mathias Hauser `_.
     - Fix html repr in untrusted notebooks: fallback to plain text repr. (:pull:`4053`)
       By `Benoit Bovy `_.
    +- Fix :py:func:`open_rasterio` for ``WarpedVRT`` with specified ``src_crs``. (:pull:`4104`)
    +  By `Dave Cole `_.
     
     Documentation
     ~~~~~~~~~~~~~
    diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py
    index 77beffd09b1..661d5b5c6fc 100644
    --- a/xarray/backends/rasterio_.py
    +++ b/xarray/backends/rasterio_.py
    @@ -221,14 +221,17 @@ def open_rasterio(filename, parse_coordinates=None, chunks=None, cache=None, loc
             vrt = filename
             filename = vrt.src_dataset.name
             vrt_params = dict(
    +            src_crs=vrt.src_crs.to_string(),
                 crs=vrt.crs.to_string(),
                 resampling=vrt.resampling,
    +            tolerance=vrt.tolerance,
                 src_nodata=vrt.src_nodata,
                 nodata=vrt.nodata,
    -            tolerance=vrt.tolerance,
    -            transform=vrt.transform,
                 width=vrt.width,
                 height=vrt.height,
    +            src_transform=vrt.src_transform,
    +            transform=vrt.transform,
    +            dtype=vrt.working_dtype,
                 warp_extras=vrt.warp_extras,
             )
     
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 49a39474b54..3642c1eb9b7 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -4160,6 +4160,19 @@ def test_rasterio_vrt_with_transform_and_size(self):
                             assert actual_shape == expected_shape
                             assert actual_transform == expected_transform
     
    +    def test_rasterio_vrt_with_src_crs(self):
    +        # Test open_rasterio() support of WarpedVRT with specified src_crs
    +        import rasterio
    +
    +        # create geotiff with no CRS and specify it manually
    +        with create_tmp_geotiff(crs=None) as (tmp_file, expected):
    +            src_crs = rasterio.crs.CRS({"init": "epsg:32618"})
    +            with rasterio.open(tmp_file) as src:
    +                assert src.crs is None
    +                with rasterio.vrt.WarpedVRT(src, src_crs=src_crs) as vrt:
    +                    with xr.open_rasterio(vrt) as da:
    +                        assert da.crs == src_crs
    +
         @network
         def test_rasterio_vrt_network(self):
             # Make sure loading w/ rasterio give same results as xarray
    
    From c07160dd2d627a021e58515cbd7753c11fb56d94 Mon Sep 17 00:00:00 2001
    From: Oriol Abril 
    Date: Fri, 5 Jun 2020 21:39:09 +0200
    Subject: [PATCH 42/71] keep attrs in reset_index (#4103)
    
    * keep attrs when resetting single index
    
    * add dataarray test
    
    * modify tests
    
    * remove rename
    
    * update what's new
    ---
     doc/whats-new.rst              | 10 ++++++----
     xarray/core/dataset.py         |  4 ++--
     xarray/tests/test_dataarray.py |  7 +++++++
     xarray/tests/test_dataset.py   |  7 +++++++
     4 files changed, 22 insertions(+), 6 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 0c5e61addf5..21eb28130c2 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -36,10 +36,12 @@ Breaking changes
     
     Enhancements
     ~~~~~~~~~~~~
    -- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp` 
    -  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially 
    +- Performance improvement of :py:meth:`DataArray.interp` and :py:func:`Dataset.interp`
    +  For orthogonal linear- and nearest-neighbor interpolation, we do 1d-interpolation sequentially
       rather than interpolating in multidimensional space. (:issue:`2223`)
       By `Keisuke Fujii `_.
    +- :py:meth:`DataArray.reset_index` and :py:meth:`Dataset.reset_index` now keep
    +  coordinate attributes (:pull:`4103`). By `Oriol Abril `_.
     
     New Features
     ~~~~~~~~~~~~
    @@ -47,7 +49,7 @@ New Features
       By `Pascal Bourgault `_.
     - ``chunks='auto'`` is now supported in the ``chunks`` argument of
       :py:meth:`Dataset.chunk`. (:issue:`4055`)
    -  By `Andrew Williams `_ 
    +  By `Andrew Williams `_
     - Added :py:func:`xarray.cov` and :py:func:`xarray.corr` (:issue:`3784`, :pull:`3550`, :pull:`4089`).
       By `Andrew Williams `_ and `Robin Beer `_.
     - Added :py:meth:`DataArray.polyfit` and :py:func:`xarray.polyval` for fitting polynomials. (:issue:`3349`, :pull:`3733`, :pull:`4099`)
    @@ -77,7 +79,7 @@ New Features
       By `Stephan Hoyer `_.
     - Allow plotting of boolean arrays. (:pull:`3766`)
       By `Marek Jacob `_
    -- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`). 
    +- Enable using MultiIndex levels as cordinates in 1D and 2D plots (:issue:`3927`).
       By `Mathias Hauser `_.
     - A ``days_in_month`` accessor for :py:class:`xarray.CFTimeIndex`, analogous to
       the ``days_in_month`` accessor for a :py:class:`pandas.DatetimeIndex`, which
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index d50c6e1951e..191b57a667a 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -329,7 +329,7 @@ def split_indexes(
             else:
                 vars_to_remove.append(d)
                 if not drop:
    -                vars_to_create[str(d) + "_"] = Variable(d, index)
    +                vars_to_create[str(d) + "_"] = Variable(d, index, variables[d].attrs)
     
         for d, levs in dim_levels.items():
             index = variables[d].to_index()
    @@ -341,7 +341,7 @@ def split_indexes(
             if not drop:
                 for lev in levs:
                     idx = index.get_level_values(lev)
    -                vars_to_create[idx.name] = Variable(d, idx)
    +                vars_to_create[idx.name] = Variable(d, idx, variables[d].attrs)
     
         new_variables = dict(variables)
         for v in set(vars_to_remove):
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index 54a77261fb4..95f0ad9f612 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -1830,6 +1830,13 @@ def test_reset_index(self):
             expected = DataArray([1, 2], coords={"x_": ("x", ["a", "b"])}, dims="x")
             assert_identical(array.reset_index("x"), expected)
     
    +    def test_reset_index_keep_attrs(self):
    +        coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
    +        da = DataArray([1, 0], [coord_1])
    +        expected = DataArray([1, 0], {"coord_1_": coord_1}, dims=["coord_1"])
    +        obj = da.reset_index("coord_1")
    +        assert_identical(expected, obj)
    +
         def test_reorder_levels(self):
             midx = self.mindex.reorder_levels(["level_2", "level_1"])
             expected = DataArray(self.mda.values, coords={"x": midx}, dims="x")
    diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
    index 2a89920766c..fd04c8a7f64 100644
    --- a/xarray/tests/test_dataset.py
    +++ b/xarray/tests/test_dataset.py
    @@ -2864,6 +2864,13 @@ def test_reset_index(self):
             with pytest.raises(TypeError):
                 ds.reset_index("x", inplace=True)
     
    +    def test_reset_index_keep_attrs(self):
    +        coord_1 = DataArray([1, 2], dims=["coord_1"], attrs={"attrs": True})
    +        ds = Dataset({}, {"coord_1": coord_1})
    +        expected = Dataset({}, {"coord_1_": coord_1})
    +        obj = ds.reset_index("coord_1")
    +        assert_identical(expected, obj)
    +
         def test_reorder_levels(self):
             ds = create_test_multiindex()
             mindex = ds["x"].to_index()
    
    From 2a288f6ed4286910fcf3ab9895e1e9cbd44d30b4 Mon Sep 17 00:00:00 2001
    From: Deepak Cherian 
    Date: Sun, 7 Jun 2020 16:13:34 +0000
    Subject: [PATCH 43/71] map_blocks: Allow passing dask-backed objects in args
     (#3818)
    
    * MVP for dask collections in args
    
    * Add tests.
    
    * Use list comprehension
    
    * map_blocks: preserve attrs of dimension coordinates in input
    
    Switch to use IndexVariables instead of Indexes so that attrs are preserved.
    
    * Check that chunk sizes are compatible.
    
    * Align all xarray objects
    
    * Add some type hints.
    
    * fix rebase
    
    * move _wrapper out
    
    * Fixes
    
    * avoid index dataarrays for simplicity.
    
    need a solution to preserve index attrs
    
    * Propagate attributes for index variables.
    
    * Propagate encoding for index variables.
    
    * Fix bug with reductions when template is provided.
    
    indexes should just have indexes for output variable. When template was
    provided, I was initializing to indexes to contain all input indexes.
    It should just have the indexes from template. Otherwise indexes for
    any indexed dimensions removed by func will still be propagated.
    
    * more minimal fix.
    
    * minimize diff
    
    * Update docs.
    
    * Address joe comments.
    
    * docstring updates.
    
    * minor docstring change
    
    * minor.
    
    * remove useless check_shapes variable.
    
    * fix docstring
    ---
     doc/whats-new.rst         |   2 +
     xarray/core/dataarray.py  |  84 +++++++++++----
     xarray/core/dataset.py    |  83 +++++++++++----
     xarray/core/parallel.py   | 210 ++++++++++++++++++++++++--------------
     xarray/tests/test_dask.py |  59 ++++++++++-
     5 files changed, 324 insertions(+), 114 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 21eb28130c2..85e73e1b7e8 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -94,6 +94,8 @@ New Features
     - :py:meth:`map_blocks` now accepts a ``template`` kwarg. This allows use cases
       where the result of a computation could not be inferred automatically.
       By `Deepak Cherian `_
    +- :py:meth:`map_blocks` can now handle dask-backed xarray objects in ``args``. (:pull:`3818`)
    +  By `Deepak Cherian `_
     
     - Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`,
       (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`,
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index 236938bac74..3451ff14c8f 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -3262,45 +3262,91 @@ def map_blocks(
             ----------
             func: callable
                 User-provided function that accepts a DataArray as its first
    -            parameter. The function will receive a subset, i.e. one block, of this DataArray
    -            (see below), corresponding to one chunk along each chunked dimension. ``func`` will be
    -            executed as ``func(block_subset, *args, **kwargs)``.
    +            parameter. The function will receive a subset or 'block' of this DataArray (see below),
    +            corresponding to one chunk along each chunked dimension. ``func`` will be
    +            executed as ``func(subset_dataarray, *subset_args, **kwargs)``.
     
                 This function must return either a single DataArray or a single Dataset.
     
                 This function cannot add a new chunked dimension.
    +
    +        obj: DataArray, Dataset
    +            Passed to the function as its first argument, one block at a time.
             args: Sequence
    -            Passed verbatim to func after unpacking, after the sliced DataArray. xarray
    -            objects, if any, will not be split by chunks. Passing dask collections is
    -            not allowed.
    +            Passed to func after unpacking and subsetting any xarray objects by blocks.
    +            xarray objects in args must be aligned with obj, otherwise an error is raised.
             kwargs: Mapping
                 Passed verbatim to func after unpacking. xarray objects, if any, will not be
    -            split by chunks. Passing dask collections is not allowed.
    +            subset to blocks. Passing dask collections in kwargs is not allowed.
             template: (optional) DataArray, Dataset
                 xarray object representing the final result after compute is called. If not provided,
    -            the function will be first run on mocked-up data, that looks like 'obj' but
    +            the function will be first run on mocked-up data, that looks like ``obj`` but
                 has sizes 0, to determine properties of the returned object such as dtype,
    -            variable names, new dimensions and new indexes (if any).
    -            'template' must be provided if the function changes the size of existing dimensions.
    +            variable names, attributes, new dimensions and new indexes (if any).
    +            ``template`` must be provided if the function changes the size of existing dimensions.
    +            When provided, ``attrs`` on variables in `template` are copied over to the result. Any
    +            ``attrs`` set by ``func`` will be ignored.
    +
     
             Returns
             -------
    -        A single DataArray or Dataset with dask backend, reassembled from the outputs of
    -        the function.
    +        A single DataArray or Dataset with dask backend, reassembled from the outputs of the
    +        function.
     
             Notes
             -----
    -        This method is designed for when one needs to manipulate a whole xarray object
    -        within each chunk. In the more common case where one can work on numpy arrays,
    -        it is recommended to use apply_ufunc.
    +        This function is designed for when ``func`` needs to manipulate a whole xarray object
    +        subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
    +        recommended to use ``apply_ufunc``.
     
    -        If none of the variables in this DataArray is backed by dask, calling this
    -        method is equivalent to calling ``func(self, *args, **kwargs)``.
    +        If none of the variables in ``obj`` is backed by dask arrays, calling this function is
    +        equivalent to calling ``func(obj, *args, **kwargs)``.
     
             See Also
             --------
    -        dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks,
    -        xarray.Dataset.map_blocks
    +        dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks,
    +        xarray.DataArray.map_blocks
    +
    +        Examples
    +        --------
    +
    +        Calculate an anomaly from climatology using ``.groupby()``. Using
    +        ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``,
    +        its indices, and its methods like ``.groupby()``.
    +
    +        >>> def calculate_anomaly(da, groupby_type="time.month"):
    +        ...     gb = da.groupby(groupby_type)
    +        ...     clim = gb.mean(dim="time")
    +        ...     return gb - clim
    +        >>> time = xr.cftime_range("1990-01", "1992-01", freq="M")
    +        >>> np.random.seed(123)
    +        >>> array = xr.DataArray(
    +        ...     np.random.rand(len(time)), dims="time", coords=[time]
    +        ... ).chunk()
    +        >>> array.map_blocks(calculate_anomaly, template=array).compute()
    +        
    +        array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
    +                0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
    +               -0.19063865,  0.0590131 , -0.12894847, -0.11323072,  0.0855964 ,
    +                0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108,
    +                0.07673453,  0.22865714,  0.19063865, -0.0590131 ])
    +        Coordinates:
    +          * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
    +
    +        Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments
    +        to the function being applied in ``xr.map_blocks()``:
    +
    +        >>> array.map_blocks(
    +        ...     calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=array,
    +        ... )
    +        
    +        array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
    +               -0.11974531,  0.43791243,  0.14197797, -0.06191987, -0.15073425,
    +               -0.19967375,  0.18619794, -0.05100474, -0.42989909, -0.09153273,
    +                0.24841842, -0.30708526, -0.31412523,  0.04197439,  0.0422506 ,
    +                0.14482397,  0.35985481,  0.23487834,  0.12144652])
    +        Coordinates:
    +            * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
             """
             from .parallel import map_blocks
     
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 191b57a667a..a8011afd3e3 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -5733,45 +5733,92 @@ def map_blocks(
             ----------
             func: callable
                 User-provided function that accepts a Dataset as its first
    -            parameter. The function will receive a subset, i.e. one block, of this Dataset
    -            (see below), corresponding to one chunk along each chunked dimension. ``func`` will be
    -            executed as ``func(block_subset, *args, **kwargs)``.
    +            parameter. The function will receive a subset or 'block' of this Dataset (see below),
    +            corresponding to one chunk along each chunked dimension. ``func`` will be
    +            executed as ``func(subset_dataset, *subset_args, **kwargs)``.
     
                 This function must return either a single DataArray or a single Dataset.
     
                 This function cannot add a new chunked dimension.
    +
    +        obj: DataArray, Dataset
    +            Passed to the function as its first argument, one block at a time.
             args: Sequence
    -            Passed verbatim to func after unpacking, after the sliced DataArray. xarray
    -            objects, if any, will not be split by chunks. Passing dask collections is
    -            not allowed.
    +            Passed to func after unpacking and subsetting any xarray objects by blocks.
    +            xarray objects in args must be aligned with obj, otherwise an error is raised.
             kwargs: Mapping
                 Passed verbatim to func after unpacking. xarray objects, if any, will not be
    -            split by chunks. Passing dask collections is not allowed.
    +            subset to blocks. Passing dask collections in kwargs is not allowed.
             template: (optional) DataArray, Dataset
                 xarray object representing the final result after compute is called. If not provided,
    -            the function will be first run on mocked-up data, that looks like 'obj' but
    +            the function will be first run on mocked-up data, that looks like ``obj`` but
                 has sizes 0, to determine properties of the returned object such as dtype,
    -            variable names, new dimensions and new indexes (if any).
    -            'template' must be provided if the function changes the size of existing dimensions.
    +            variable names, attributes, new dimensions and new indexes (if any).
    +            ``template`` must be provided if the function changes the size of existing dimensions.
    +            When provided, ``attrs`` on variables in `template` are copied over to the result. Any
    +            ``attrs`` set by ``func`` will be ignored.
    +
     
             Returns
             -------
    -        A single DataArray or Dataset with dask backend, reassembled from the outputs of
    -        the function.
    +        A single DataArray or Dataset with dask backend, reassembled from the outputs of the
    +        function.
     
             Notes
             -----
    -        This method is designed for when one needs to manipulate a whole xarray object
    -        within each chunk. In the more common case where one can work on numpy arrays,
    -        it is recommended to use apply_ufunc.
    +        This function is designed for when ``func`` needs to manipulate a whole xarray object
    +        subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
    +        recommended to use ``apply_ufunc``.
     
    -        If none of the variables in this Dataset is backed by dask, calling this method
    -        is equivalent to calling ``func(self, *args, **kwargs)``.
    +        If none of the variables in ``obj`` is backed by dask arrays, calling this function is
    +        equivalent to calling ``func(obj, *args, **kwargs)``.
     
             See Also
             --------
    -        dask.array.map_blocks, xarray.apply_ufunc, xarray.map_blocks,
    +        dask.array.map_blocks, xarray.apply_ufunc, xarray.Dataset.map_blocks,
             xarray.DataArray.map_blocks
    +
    +        Examples
    +        --------
    +
    +        Calculate an anomaly from climatology using ``.groupby()``. Using
    +        ``xr.map_blocks()`` allows for parallel operations with knowledge of ``xarray``,
    +        its indices, and its methods like ``.groupby()``.
    +
    +        >>> def calculate_anomaly(da, groupby_type="time.month"):
    +        ...     gb = da.groupby(groupby_type)
    +        ...     clim = gb.mean(dim="time")
    +        ...     return gb - clim
    +        >>> time = xr.cftime_range("1990-01", "1992-01", freq="M")
    +        >>> np.random.seed(123)
    +        >>> array = xr.DataArray(
    +        ...     np.random.rand(len(time)), dims="time", coords=[time]
    +        ... ).chunk()
    +        >>> ds = xr.Dataset({"a": array})
    +        >>> ds.map_blocks(calculate_anomaly, template=ds).compute()
    +        
    +        array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
    +                0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
    +               -0.19063865,  0.0590131 , -0.12894847, -0.11323072,  0.0855964 ,
    +                0.09334032, -0.26848862, -0.12382735, -0.22460641, -0.07650108,
    +                0.07673453,  0.22865714,  0.19063865, -0.0590131 ])
    +        Coordinates:
    +          * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
    +
    +        Note that one must explicitly use ``args=[]`` and ``kwargs={}`` to pass arguments
    +        to the function being applied in ``xr.map_blocks()``:
    +
    +        >>> ds.map_blocks(
    +        ...     calculate_anomaly, kwargs={"groupby_type": "time.year"}, template=ds,
    +        ... )
    +        
    +        array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
    +               -0.11974531,  0.43791243,  0.14197797, -0.06191987, -0.15073425,
    +               -0.19967375,  0.18619794, -0.05100474, -0.42989909, -0.09153273,
    +                0.24841842, -0.30708526, -0.31412523,  0.04197439,  0.0422506 ,
    +                0.14482397,  0.35985481,  0.23487834,  0.12144652])
    +        Coordinates:
    +            * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
             """
             from .parallel import map_blocks
     
    diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
    index d91dfb4a275..522c5b36ff5 100644
    --- a/xarray/core/parallel.py
    +++ b/xarray/core/parallel.py
    @@ -16,6 +16,8 @@
         DefaultDict,
         Dict,
         Hashable,
    +    Iterable,
    +    List,
         Mapping,
         Sequence,
         Tuple,
    @@ -25,12 +27,29 @@
     
     import numpy as np
     
    +from .alignment import align
     from .dataarray import DataArray
     from .dataset import Dataset
     
     T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset)
     
     
    +def to_object_array(iterable):
    +    # using empty_like calls compute
    +    npargs = np.empty((len(iterable),), dtype=np.object)
    +    npargs[:] = iterable
    +    return npargs
    +
    +
    +def assert_chunks_compatible(a: Dataset, b: Dataset):
    +    a = a.unify_chunks()
    +    b = b.unify_chunks()
    +
    +    for dim in set(a.chunks).intersection(set(b.chunks)):
    +        if a.chunks[dim] != b.chunks[dim]:
    +            raise ValueError(f"Chunk sizes along dimension {dim!r} are not equal.")
    +
    +
     def check_result_variables(
         result: Union[DataArray, Dataset], expected: Mapping[str, Any], kind: str
     ):
    @@ -67,6 +86,17 @@ def dataset_to_dataarray(obj: Dataset) -> DataArray:
         return next(iter(obj.data_vars.values()))
     
     
    +def dataarray_to_dataset(obj: DataArray) -> Dataset:
    +    # only using _to_temp_dataset would break
    +    # func = lambda x: x.to_dataset()
    +    # since that relies on preserving name.
    +    if obj.name is None:
    +        dataset = obj._to_temp_dataset()
    +    else:
    +        dataset = obj.to_dataset()
    +    return dataset
    +
    +
     def make_meta(obj):
         """If obj is a DataArray or Dataset, return a new object of the same type and with
         the same variables and dtypes, but where all variables have size 0 and numpy
    @@ -150,30 +180,30 @@ def map_blocks(
         ----------
         func: callable
             User-provided function that accepts a DataArray or Dataset as its first
    -        parameter. The function will receive a subset of 'obj' (see below),
    +        parameter ``obj``. The function will receive a subset or 'block' of ``obj`` (see below),
             corresponding to one chunk along each chunked dimension. ``func`` will be
    -        executed as ``func(obj_subset, *args, **kwargs)``.
    +        executed as ``func(subset_obj, *subset_args, **kwargs)``.
     
             This function must return either a single DataArray or a single Dataset.
     
             This function cannot add a new chunked dimension.
     
         obj: DataArray, Dataset
    -        Passed to the function as its first argument, one dask chunk at a time.
    +        Passed to the function as its first argument, one block at a time.
         args: Sequence
    -        Passed verbatim to func after unpacking, after the sliced obj. xarray objects,
    -        if any, will not be split by chunks. Passing dask collections is not allowed.
    +        Passed to func after unpacking and subsetting any xarray objects by blocks.
    +        xarray objects in args must be aligned with obj, otherwise an error is raised.
         kwargs: Mapping
             Passed verbatim to func after unpacking. xarray objects, if any, will not be
    -        split by chunks. Passing dask collections is not allowed.
    +        subset to blocks. Passing dask collections in kwargs is not allowed.
         template: (optional) DataArray, Dataset
             xarray object representing the final result after compute is called. If not provided,
    -        the function will be first run on mocked-up data, that looks like 'obj' but
    +        the function will be first run on mocked-up data, that looks like ``obj`` but
             has sizes 0, to determine properties of the returned object such as dtype,
             variable names, attributes, new dimensions and new indexes (if any).
    -        'template' must be provided if the function changes the size of existing dimensions.
    -        When provided, `attrs` on variables in `template` are copied over to the result. Any
    -        `attrs` set by `func` will be ignored.
    +        ``template`` must be provided if the function changes the size of existing dimensions.
    +        When provided, ``attrs`` on variables in `template` are copied over to the result. Any
    +        ``attrs`` set by ``func`` will be ignored.
     
     
         Returns
    @@ -183,11 +213,11 @@ def map_blocks(
     
         Notes
         -----
    -    This function is designed for when one needs to manipulate a whole xarray object
    -    within each chunk. In the more common case where one can work on numpy arrays, it is
    -    recommended to use apply_ufunc.
    +    This function is designed for when ``func`` needs to manipulate a whole xarray object
    +    subset to each block. In the more common case where ``func`` can work on numpy arrays, it is
    +    recommended to use ``apply_ufunc``.
     
    -    If none of the variables in obj is backed by dask, calling this function is
    +    If none of the variables in ``obj`` is backed by dask arrays, calling this function is
         equivalent to calling ``func(obj, *args, **kwargs)``.
     
         See Also
    @@ -203,10 +233,6 @@ def map_blocks(
         its indices, and its methods like ``.groupby()``.
     
         >>> def calculate_anomaly(da, groupby_type="time.month"):
    -    ...     # Necessary workaround to xarray's check with zero dimensions
    -    ...     # https://github.com/pydata/xarray/issues/3575
    -    ...     if sum(da.shape) == 0:
    -    ...         return da
         ...     gb = da.groupby(groupby_type)
         ...     clim = gb.mean(dim="time")
         ...     return gb - clim
    @@ -215,7 +241,7 @@ def map_blocks(
         >>> array = xr.DataArray(
         ...     np.random.rand(len(time)), dims="time", coords=[time]
         ... ).chunk()
    -    >>> xr.map_blocks(calculate_anomaly, array).compute()
    +    >>> xr.map_blocks(calculate_anomaly, array, template=array).compute()
         
         array([ 0.12894847,  0.11323072, -0.0855964 , -0.09334032,  0.26848862,
                 0.12382735,  0.22460641,  0.07650108, -0.07673453, -0.22865714,
    @@ -229,7 +255,7 @@ def map_blocks(
         to the function being applied in ``xr.map_blocks()``:
     
         >>> xr.map_blocks(
    -    ...     calculate_anomaly, array, kwargs={"groupby_type": "time.year"},
    +    ...     calculate_anomaly, array, kwargs={"groupby_type": "time.year"}, template=array,
         ... )
         
         array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
    @@ -241,14 +267,24 @@ def map_blocks(
             * time     (time) object 1990-01-31 00:00:00 ... 1991-12-31 00:00:00
         """
     
    -    def _wrapper(func, obj, to_array, args, kwargs, expected):
    -        check_shapes = dict(obj.dims)
    -        check_shapes.update(expected["shapes"])
    -
    -        if to_array:
    -            obj = dataset_to_dataarray(obj)
    -
    -        result = func(obj, *args, **kwargs)
    +    def _wrapper(
    +        func: Callable,
    +        args: List,
    +        kwargs: dict,
    +        arg_is_array: Iterable[bool],
    +        expected: dict,
    +    ):
    +        """
    +        Wrapper function that receives datasets in args; converts to dataarrays when necessary;
    +        passes these to the user function `func` and checks returned objects for expected shapes/sizes/etc.
    +        """
    +
    +        converted_args = [
    +            dataset_to_dataarray(arg) if is_array else arg
    +            for is_array, arg in zip(arg_is_array, args)
    +        ]
    +
    +        result = func(*converted_args, **kwargs)
     
             # check all dims are present
             missing_dimensions = set(expected["shapes"]) - set(result.sizes)
    @@ -259,10 +295,10 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
     
             # check that index lengths and values are as expected
             for name, index in result.indexes.items():
    -            if name in check_shapes:
    -                if len(index) != check_shapes[name]:
    +            if name in expected["shapes"]:
    +                if len(index) != expected["shapes"][name]:
                         raise ValueError(
    -                        f"Received dimension {name!r} of length {len(index)}. Expected length {check_shapes[name]}."
    +                        f"Received dimension {name!r} of length {len(index)}. Expected length {expected['shapes'][name]}."
                         )
                 if name in expected["indexes"]:
                     expected_index = expected["indexes"][name]
    @@ -289,38 +325,44 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
         elif not isinstance(kwargs, Mapping):
             raise TypeError("kwargs must be a mapping (for example, a dict)")
     
    -    for value in list(args) + list(kwargs.values()):
    +    for value in kwargs.values():
             if dask.is_dask_collection(value):
                 raise TypeError(
    -                "Cannot pass dask collections in args or kwargs yet. Please compute or "
    +                "Cannot pass dask collections in kwargs yet. Please compute or "
                     "load values before passing to map_blocks."
                 )
     
         if not dask.is_dask_collection(obj):
             return func(obj, *args, **kwargs)
     
    -    if isinstance(obj, DataArray):
    -        # only using _to_temp_dataset would break
    -        # func = lambda x: x.to_dataset()
    -        # since that relies on preserving name.
    -        if obj.name is None:
    -            dataset = obj._to_temp_dataset()
    -        else:
    -            dataset = obj.to_dataset()
    -        input_is_array = True
    -    else:
    -        dataset = obj
    -        input_is_array = False
    +    npargs = to_object_array([obj] + list(args))
    +    is_xarray = [isinstance(arg, (Dataset, DataArray)) for arg in npargs]
    +    is_array = [isinstance(arg, DataArray) for arg in npargs]
    +
    +    # all xarray objects must be aligned. This is consistent with apply_ufunc.
    +    aligned = align(*npargs[is_xarray], join="exact")
    +    # assigning to object arrays works better when RHS is object array
    +    # https://stackoverflow.com/questions/43645135/boolean-indexing-assignment-of-a-numpy-array-to-a-numpy-array
    +    npargs[is_xarray] = to_object_array(aligned)
    +    npargs[is_array] = to_object_array(
    +        [dataarray_to_dataset(da) for da in npargs[is_array]]
    +    )
    +
    +    # check that chunk sizes are compatible
    +    input_chunks = dict(npargs[0].chunks)
    +    input_indexes = dict(npargs[0].indexes)
    +    for arg in npargs[1:][is_xarray[1:]]:
    +        assert_chunks_compatible(npargs[0], arg)
    +        input_chunks.update(arg.chunks)
    +        input_indexes.update(arg.indexes)
     
    -    input_chunks = dataset.chunks
    -    dataset_indexes = set(dataset.indexes)
         if template is None:
             # infer template by providing zero-shaped arrays
    -        template = infer_template(func, obj, *args, **kwargs)
    +        template = infer_template(func, aligned[0], *args, **kwargs)
             template_indexes = set(template.indexes)
    -        preserved_indexes = template_indexes & dataset_indexes
    -        new_indexes = template_indexes - dataset_indexes
    -        indexes = {dim: dataset.indexes[dim] for dim in preserved_indexes}
    +        preserved_indexes = template_indexes & set(input_indexes)
    +        new_indexes = template_indexes - set(input_indexes)
    +        indexes = {dim: input_indexes[dim] for dim in preserved_indexes}
             indexes.update({k: template.indexes[k] for k in new_indexes})
             output_chunks = {
                 dim: input_chunks[dim] for dim in template.dims if dim in input_chunks
    @@ -328,13 +370,11 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
     
         else:
             # template xarray object has been provided with proper sizes and chunk shapes
    -        template_indexes = set(template.indexes)
    -        indexes = {dim: dataset.indexes[dim] for dim in dataset_indexes}
    -        indexes.update({k: template.indexes[k] for k in template_indexes})
    +        indexes = dict(template.indexes)
             if isinstance(template, DataArray):
                 output_chunks = dict(zip(template.dims, template.chunks))  # type: ignore
             else:
    -            output_chunks = template.chunks  # type: ignore
    +            output_chunks = dict(template.chunks)
     
         for dim in output_chunks:
             if dim in input_chunks and len(input_chunks[dim]) != len(output_chunks[dim]):
    @@ -363,7 +403,7 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
         graph: Dict[Any, Any] = {}
         new_layers: DefaultDict[str, Dict[Any, Any]] = collections.defaultdict(dict)
         gname = "{}-{}".format(
    -        dask.utils.funcname(func), dask.base.tokenize(dataset, args, kwargs)
    +        dask.utils.funcname(func), dask.base.tokenize(npargs[0], args, kwargs)
         )
     
         # map dims to list of chunk indexes
    @@ -376,9 +416,14 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
             dim: np.cumsum((0,) + chunks_v) for dim, chunks_v in output_chunks.items()
         }
     
    -    # iterate over all possible chunk combinations
    -    for v in itertools.product(*ichunk.values()):
    -        chunk_index = dict(zip(dataset.dims, v))
    +    def subset_dataset_to_block(
    +        graph: dict, gname: str, dataset: Dataset, input_chunk_bounds, chunk_index
    +    ):
    +        """
    +        Creates a task that subsets an xarray dataset to a block determined by chunk_index.
    +        Block extents are determined by input_chunk_bounds.
    +        Also subtasks that subset the constituent variables of a dataset.
    +        """
     
             # this will become [[name1, variable1],
             #                   [name2, variable2],
    @@ -387,6 +432,7 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
             data_vars = []
             coords = []
     
    +        chunk_tuple = tuple(chunk_index.values())
             for name, variable in dataset.variables.items():
                 # make a task that creates tuple of (dims, chunk)
                 if dask.is_dask_collection(variable.data):
    @@ -395,13 +441,13 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                     for dim in variable.dims:
                         chunk = chunk[chunk_index[dim]]
     
    -                chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + v
    +                chunk_variable_task = (f"{gname}-{name}-{chunk[0]}",) + chunk_tuple
                     graph[chunk_variable_task] = (
                         tuple,
                         [variable.dims, chunk, variable.attrs],
                     )
                 else:
    -                # non-dask array with possibly chunked dimensions
    +                # non-dask array possibly with dimensions chunked on other variables
                     # index into variable appropriately
                     subsetter = {
                         dim: _get_chunk_slicer(dim, chunk_index, input_chunk_bounds)
    @@ -410,7 +456,7 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                     subset = variable.isel(subsetter)
                     chunk_variable_task = (
                         "{}-{}".format(gname, dask.base.tokenize(subset)),
    -                ) + v
    +                ) + chunk_tuple
                     graph[chunk_variable_task] = (
                         tuple,
                         [subset.dims, subset, subset.attrs],
    @@ -422,7 +468,22 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                 else:
                     data_vars.append([name, chunk_variable_task])
     
    -        # expected["shapes", "coords", "data_vars", "indexes"] are used to raise nice error messages in _wrapper
    +        return (Dataset, (dict, data_vars), (dict, coords), dataset.attrs)
    +
    +    # iterate over all possible chunk combinations
    +    for chunk_tuple in itertools.product(*ichunk.values()):
    +        # mapping from dimension name to chunk index
    +        chunk_index = dict(zip(ichunk.keys(), chunk_tuple))
    +
    +        blocked_args = [
    +            subset_dataset_to_block(graph, gname, arg, input_chunk_bounds, chunk_index)
    +            if isxr
    +            else arg
    +            for isxr, arg in zip(is_xarray, npargs)
    +        ]
    +
    +        # expected["shapes", "coords", "data_vars", "indexes"] are used to
    +        # raise nice error messages in _wrapper
             expected = {}
             # input chunk 0 along a dimension maps to output chunk 0 along the same dimension
             # even if length of dimension is changed by the applied function
    @@ -436,16 +497,8 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                 for dim in indexes
             }
     
    -        from_wrapper = (gname,) + v
    -        graph[from_wrapper] = (
    -            _wrapper,
    -            func,
    -            (Dataset, (dict, data_vars), (dict, coords), dataset.attrs),
    -            input_is_array,
    -            args,
    -            kwargs,
    -            expected,
    -        )
    +        from_wrapper = (gname,) + chunk_tuple
    +        graph[from_wrapper] = (_wrapper, func, blocked_args, kwargs, is_array, expected)
     
             # mapping from variable name to dask graph key
             var_key_map: Dict[Hashable, str] = {}
    @@ -472,7 +525,11 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                 # layer.
                 new_layers[gname_l][key] = (operator.getitem, from_wrapper, name)
     
    -    hlg = HighLevelGraph.from_collections(gname, graph, dependencies=[dataset])
    +    hlg = HighLevelGraph.from_collections(
    +        gname,
    +        graph,
    +        dependencies=[arg for arg in npargs if dask.is_dask_collection(arg)],
    +    )
     
         for gname_l, layer in new_layers.items():
             # This adds in the getitems for each variable in the dataset.
    @@ -480,6 +537,10 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
             hlg.layers[gname_l] = layer
     
         result = Dataset(coords=indexes, attrs=template.attrs)
    +    for index in result.indexes:
    +        result[index].attrs = template[index].attrs
    +        result[index].encoding = template[index].encoding
    +
         for name, gname_l in var_key_map.items():
             dims = template[name].dims
             var_chunks = []
    @@ -496,6 +557,7 @@ def _wrapper(func, obj, to_array, args, kwargs, expected):
                 hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype
             )
             result[name] = (dims, data, template[name].attrs)
    +        result[name].encoding = template[name].encoding
     
         result = result.set_coords(template._coord_names)
     
    diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
    index 6f714fe1825..caeb7ad4dc8 100644
    --- a/xarray/tests/test_dask.py
    +++ b/xarray/tests/test_dask.py
    @@ -972,6 +972,7 @@ def make_da():
             coords={"x": np.arange(10), "y": np.arange(100, 120)},
             name="a",
         ).chunk({"x": 4, "y": 5})
    +    da.x.attrs["long_name"] = "x"
         da.attrs["test"] = "test"
         da.coords["c2"] = 0.5
         da.coords["ndcoord"] = da.x * 2
    @@ -995,6 +996,9 @@ def make_ds():
         map_ds.attrs["test"] = "test"
         map_ds.coords["xx"] = map_ds["a"] * map_ds.y
     
    +    map_ds.x.attrs["long_name"] = "x"
    +    map_ds.y.attrs["long_name"] = "y"
    +
         return map_ds
     
     
    @@ -1074,9 +1078,6 @@ def really_bad_func(darray):
         with raises_regex(ValueError, "inconsistent chunks"):
             xr.map_blocks(bad_func, ds_copy)
     
    -    with raises_regex(TypeError, "Cannot pass dask collections"):
    -        xr.map_blocks(bad_func, map_da, args=[map_da.chunk()])
    -
         with raises_regex(TypeError, "Cannot pass dask collections"):
             xr.map_blocks(bad_func, map_da, kwargs=dict(a=map_da.chunk()))
     
    @@ -1103,6 +1104,58 @@ def test_map_blocks_convert_args_to_list(obj):
         assert_identical(actual, expected)
     
     
    +def test_map_blocks_dask_args():
    +    da1 = xr.DataArray(
    +        np.ones((10, 20)),
    +        dims=["x", "y"],
    +        coords={"x": np.arange(10), "y": np.arange(20)},
    +    ).chunk({"x": 5, "y": 4})
    +
    +    # check that block shapes are the same
    +    def sumda(da1, da2):
    +        assert da1.shape == da2.shape
    +        return da1 + da2
    +
    +    da2 = da1 + 1
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(sumda, da1, args=[da2])
    +    xr.testing.assert_equal(da1 + da2, mapped)
    +
    +    # one dimension in common
    +    da2 = (da1 + 1).isel(x=1, drop=True)
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(operator.add, da1, args=[da2])
    +    xr.testing.assert_equal(da1 + da2, mapped)
    +
    +    # test that everything works when dimension names are different
    +    da2 = (da1 + 1).isel(x=1, drop=True).rename({"y": "k"})
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(operator.add, da1, args=[da2])
    +    xr.testing.assert_equal(da1 + da2, mapped)
    +
    +    with raises_regex(ValueError, "Chunk sizes along dimension 'x'"):
    +        xr.map_blocks(operator.add, da1, args=[da1.chunk({"x": 1})])
    +
    +    with raises_regex(ValueError, "indexes along dimension 'x' are not equal"):
    +        xr.map_blocks(operator.add, da1, args=[da1.reindex(x=np.arange(20))])
    +
    +    # reduction
    +    da1 = da1.chunk({"x": -1})
    +    da2 = da1 + 1
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(lambda a, b: (a + b).sum("x"), da1, args=[da2])
    +    xr.testing.assert_equal((da1 + da2).sum("x"), mapped)
    +
    +    # reduction with template
    +    da1 = da1.chunk({"x": -1})
    +    da2 = da1 + 1
    +    with raise_if_dask_computes():
    +        mapped = xr.map_blocks(
    +            lambda a, b: (a + b).sum("x"), da1, args=[da2], template=da1.sum("x")
    +        )
    +    xr.testing.assert_equal((da1 + da2).sum("x"), mapped)
    +
    +
     @pytest.mark.parametrize("obj", [make_da(), make_ds()])
     def test_map_blocks_add_attrs(obj):
         def add_attrs(obj):
    
    From 4071125feedee690364272e8fde9b94866f85bc7 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Thu, 11 Jun 2020 04:14:48 +0200
    Subject: [PATCH 44/71] Fix the upstream-dev pandas build failure (#4138)
    
    * use the scipy-wheels-nightly repository for numpy, scipy and pandas
    
    * update the repository url
    
    * avoid installing over the conda packages
    
    * use the repository as a package index
    
    * run the uninstall with -y
    
    * use the correct url for the scipy-wheels-nightly repository
    ---
     ci/azure/install.yml | 27 ++++++++++++++++++++++-----
     1 file changed, 22 insertions(+), 5 deletions(-)
    
    diff --git a/ci/azure/install.yml b/ci/azure/install.yml
    index 60559dd2064..eff229e863a 100644
    --- a/ci/azure/install.yml
    +++ b/ci/azure/install.yml
    @@ -12,14 +12,32 @@ steps:
     
     - bash: |
         source activate xarray-tests
    +    conda uninstall -y --force \
    +        numpy \
    +        scipy \
    +        pandas \
    +        matplotlib \
    +        dask \
    +        distributed \
    +        zarr \
    +        cftime \
    +        rasterio \
    +        pint \
    +        bottleneck
         python -m pip install \
    -        -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \
    +        -i https://pypi.anaconda.org/scipy-wheels-nightly/simple \
             --no-deps \
             --pre \
             --upgrade \
    -        matplotlib \
             numpy \
    -        scipy
    +        scipy \
    +        pandas
    +    python -m pip install \
    +        -f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com \
    +        --no-deps \
    +        --pre \
    +        --upgrade \
    +        matplotlib
         python -m pip install \
             --no-deps \
             --upgrade \
    @@ -29,8 +47,7 @@ steps:
             git+https://github.com/Unidata/cftime \
             git+https://github.com/mapbox/rasterio \
             git+https://github.com/hgrecco/pint \
    -        git+https://github.com/pydata/bottleneck \
    -        git+https://github.com/pandas-dev/pandas
    +        git+https://github.com/pydata/bottleneck
       condition: eq(variables['UPSTREAM_DEV'], 'true')
       displayName: Install upstream dev dependencies
     
    
    From 8f688ea92ae8416ecc3e18f6e060dad16960e9ac Mon Sep 17 00:00:00 2001
    From: Spencer Clark 
    Date: Thu, 11 Jun 2020 19:23:28 -0400
    Subject: [PATCH 45/71] Remove outdated note from datetime accessor docstring
     (#4148)
    
    ---
     xarray/core/accessor_dt.py | 6 ------
     1 file changed, 6 deletions(-)
    
    diff --git a/xarray/core/accessor_dt.py b/xarray/core/accessor_dt.py
    index 2977596036c..630aaee142f 100644
    --- a/xarray/core/accessor_dt.py
    +++ b/xarray/core/accessor_dt.py
    @@ -240,12 +240,6 @@ class DatetimeAccessor(Properties):
         Fields can be accessed through the `.dt` attribute
         for applicable DataArrays.
     
    -    Notes
    -    ------
    -    Note that these fields are not calendar-aware; if your datetimes are encoded
    -    with a non-Gregorian calendar (e.g. a 360-day calendar) using cftime,
    -    then some fields like `dayofyear` may not be accurate.
    -
         Examples
         ---------
         >>> import xarray as xr
    
    From 59a239710e0510f0cad28c7a521d8827a6633c36 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Fri, 12 Jun 2020 17:03:19 +0200
    Subject: [PATCH 46/71] speed up map_blocks (#4149)
    
    * replace the object array with generator expressions and zip/enumerate
    
    * remove a leftover grouping pair of parentheses
    
    * reuse is_array instead of comparing again
    ---
     xarray/core/parallel.py | 37 ++++++++++++++++++++++---------------
     1 file changed, 22 insertions(+), 15 deletions(-)
    
    diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
    index 522c5b36ff5..3a77753d0d1 100644
    --- a/xarray/core/parallel.py
    +++ b/xarray/core/parallel.py
    @@ -34,11 +34,8 @@
     T_DSorDA = TypeVar("T_DSorDA", DataArray, Dataset)
     
     
    -def to_object_array(iterable):
    -    # using empty_like calls compute
    -    npargs = np.empty((len(iterable),), dtype=np.object)
    -    npargs[:] = iterable
    -    return npargs
    +def unzip(iterable):
    +    return zip(*iterable)
     
     
     def assert_chunks_compatible(a: Dataset, b: Dataset):
    @@ -335,23 +332,33 @@ def _wrapper(
         if not dask.is_dask_collection(obj):
             return func(obj, *args, **kwargs)
     
    -    npargs = to_object_array([obj] + list(args))
    -    is_xarray = [isinstance(arg, (Dataset, DataArray)) for arg in npargs]
    -    is_array = [isinstance(arg, DataArray) for arg in npargs]
    +    all_args = [obj] + list(args)
    +    is_xarray = [isinstance(arg, (Dataset, DataArray)) for arg in all_args]
    +    is_array = [isinstance(arg, DataArray) for arg in all_args]
    +
    +    # there should be a better way to group this. partition?
    +    xarray_indices, xarray_objs = unzip(
    +        (index, arg) for index, arg in enumerate(all_args) if is_xarray[index]
    +    )
    +    others = [
    +        (index, arg) for index, arg in enumerate(all_args) if not is_xarray[index]
    +    ]
     
         # all xarray objects must be aligned. This is consistent with apply_ufunc.
    -    aligned = align(*npargs[is_xarray], join="exact")
    -    # assigning to object arrays works better when RHS is object array
    -    # https://stackoverflow.com/questions/43645135/boolean-indexing-assignment-of-a-numpy-array-to-a-numpy-array
    -    npargs[is_xarray] = to_object_array(aligned)
    -    npargs[is_array] = to_object_array(
    -        [dataarray_to_dataset(da) for da in npargs[is_array]]
    +    aligned = align(*xarray_objs, join="exact")
    +    xarray_objs = tuple(
    +        dataarray_to_dataset(arg) if is_da else arg
    +        for is_da, arg in zip(is_array, aligned)
    +    )
    +
    +    _, npargs = unzip(
    +        sorted(list(zip(xarray_indices, xarray_objs)) + others, key=lambda x: x[0])
         )
     
         # check that chunk sizes are compatible
         input_chunks = dict(npargs[0].chunks)
         input_indexes = dict(npargs[0].indexes)
    -    for arg in npargs[1:][is_xarray[1:]]:
    +    for arg in xarray_objs[1:]:
             assert_chunks_compatible(npargs[0], arg)
             input_chunks.update(arg.chunks)
             input_indexes.update(arg.indexes)
    
    From 48fbee08711bf01a4de9a822e0721608f7dd3093 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Fri, 12 Jun 2020 17:03:55 +0200
    Subject: [PATCH 47/71] parameter documentation for DataArray.sel (#4150)
    
    * copy the parameter documentation of Dataset.sel to DataArray.sel
    
    * reflow the return value documentation
    
    * update whats-new.rst
    ---
     doc/whats-new.rst        |  4 +++-
     xarray/core/dataarray.py | 52 ++++++++++++++++++++++++++++++++++++++++
     2 files changed, 55 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 85e73e1b7e8..68b2d738073 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -172,8 +172,10 @@ Documentation
       By `Matthias Riße `_.
     - Apply ``black`` to all the code in the documentation (:pull:`4012`)
       By `Justus Magin `_.
    -- Narrative documentation now describes :py:meth:`map_blocks`. :ref:`dask.automatic-parallelization`.
    +- Narrative documentation now describes :py:meth:`map_blocks`: :ref:`dask.automatic-parallelization`.
       By `Deepak Cherian `_.
    +- Add documentation for the parameters and return values of :py:meth:`DataArray.sel`.
    +  By `Justus Magin `_.
     
     Internal Changes
     ~~~~~~~~~~~~~~~~
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index 3451ff14c8f..44773e36e30 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -1076,6 +1076,19 @@ def sel(
             """Return a new DataArray whose data is given by selecting index
             labels along the specified dimension(s).
     
    +        In contrast to `DataArray.isel`, indexers for this method should use
    +        labels instead of integers.
    +
    +        Under the hood, this method is powered by using pandas's powerful Index
    +        objects. This makes label based indexing essentially just as fast as
    +        using integer indexing.
    +
    +        It also means this method uses pandas's (well documented) logic for
    +        indexing. This means you can use string shortcuts for datetime indexes
    +        (e.g., '2000-01' to select all values in January 2000). It also means
    +        that slices are treated as inclusive of both the start and stop values,
    +        unlike normal Python indexing.
    +
             .. warning::
     
               Do not try to assign values when using any of the indexing methods
    @@ -1088,6 +1101,45 @@ def sel(
               Assigning values with the chained indexing using ``.sel`` or
               ``.isel`` fails silently.
     
    +        Parameters
    +        ----------
    +        indexers : dict, optional
    +            A dict with keys matching dimensions and values given
    +            by scalars, slices or arrays of tick labels. For dimensions with
    +            multi-index, the indexer may also be a dict-like object with keys
    +            matching index level names.
    +            If DataArrays are passed as indexers, xarray-style indexing will be
    +            carried out. See :ref:`indexing` for the details.
    +            One of indexers or indexers_kwargs must be provided.
    +        method : {None, 'nearest', 'pad'/'ffill', 'backfill'/'bfill'}, optional
    +            Method to use for inexact matches:
    +
    +            * None (default): only exact matches
    +            * pad / ffill: propagate last valid index value forward
    +            * backfill / bfill: propagate next valid index value backward
    +            * nearest: use nearest valid index value
    +        tolerance : optional
    +            Maximum distance between original and new labels for inexact
    +            matches. The values of the index at the matching locations must
    +            satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
    +        drop : bool, optional
    +            If ``drop=True``, drop coordinates variables in `indexers` instead
    +            of making them scalar.
    +        **indexers_kwargs : {dim: indexer, ...}, optional
    +            The keyword arguments form of ``indexers``.
    +            One of indexers or indexers_kwargs must be provided.
    +
    +        Returns
    +        -------
    +        obj : DataArray
    +            A new DataArray with the same contents as this DataArray, except the
    +            data and each dimension is indexed by the appropriate indexers.
    +            If indexer DataArrays have coordinates that do not conflict with
    +            this object, then these coordinates will be attached.
    +            In general, each array's data will be a view of the array's data
    +            in this DataArray, unless vectorized indexing was triggered by using
    +            an array indexer, in which case the data will be a copy.
    +
             See Also
             --------
             Dataset.sel
    
    From e8bd8665e8fd762031c2d9c87987d21e113e41cc Mon Sep 17 00:00:00 2001
    From: Deepak Cherian 
    Date: Fri, 12 Jun 2020 15:04:18 +0000
    Subject: [PATCH 48/71] Recommend installing cftime when time decoding fails.
     (#4134)
    
    ---
     xarray/coding/times.py | 5 +++--
     1 file changed, 3 insertions(+), 2 deletions(-)
    
    diff --git a/xarray/coding/times.py b/xarray/coding/times.py
    index d923f1ad088..dafa8ca03b1 100644
    --- a/xarray/coding/times.py
    +++ b/xarray/coding/times.py
    @@ -80,8 +80,9 @@ def _decode_cf_datetime_dtype(data, units, calendar, use_cftime):
                 "the default calendar" if calendar is None else "calendar %r" % calendar
             )
             msg = (
    -            "unable to decode time units %r with %s. Try "
    -            "opening your dataset with decode_times=False." % (units, calendar_msg)
    +            f"unable to decode time units {units!r} with {calendar_msg!r}. Try "
    +            "opening your dataset with decode_times=False or installing cftime "
    +            "if it is not installed."
             )
             raise ValueError(msg)
         else:
    
    From e26b80f3b813d84520eef4d371a2609fd09182e3 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sat, 13 Jun 2020 19:52:45 +0200
    Subject: [PATCH 49/71] built-in accessor documentation (#3988)
    
    * ad a property-like descriptor that works both on objects and classes
    
    * generate documentation for the plotting accessor methods
    
    * add a docstring to the custom property-like descriptor
    
    * use the accessor syntax in the main plotting section
    
    * explain why we need a custom property class
    
    * rename the custom property to UncachedAccessor
    
    to match the behavior of _CachedAccessor, it also accepts the
    accessor class (not the object). We lose the ability for custom
    docstrings, though.
    
    * declare that __call__ wraps plot
    
    * add accessor tests
    
    * add the autosummary templates from pandas
    
    * update the plotting section to use the accessor templates
    
    * remove the separate callable section
    
    * fix the import order
    
    * add the DataArray.str accessor as a new subsection
    
    * add the datetime accessor to the main api page
    
    * move the plotting functions into the DataArray / Dataset sections
    
    * remove the documentation of the accessor class itself
    
    * manually copy the docstring since functools.wraps does more than that
    
    * also copy the annotations and mark __call__ as wrapping plot
    
    * re-enable __slots__
    
    * update whats-new.rst
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/_templates/autosummary/accessor.rst       |   6 +
     .../autosummary/accessor_attribute.rst        |   6 +
     .../autosummary/accessor_callable.rst         |   6 +
     .../autosummary/accessor_method.rst           |   6 +
     doc/api.rst                                   | 165 ++++++++++++++++--
     doc/conf.py                                   | 116 ++++++++++++
     doc/whats-new.rst                             |   2 +
     xarray/core/dataarray.py                      |  23 +--
     xarray/core/dataset.py                        |  11 +-
     xarray/core/utils.py                          |  18 ++
     xarray/plot/plot.py                           |   5 +
     xarray/tests/test_plot.py                     |  12 ++
     12 files changed, 327 insertions(+), 49 deletions(-)
     create mode 100644 doc/_templates/autosummary/accessor.rst
     create mode 100644 doc/_templates/autosummary/accessor_attribute.rst
     create mode 100644 doc/_templates/autosummary/accessor_callable.rst
     create mode 100644 doc/_templates/autosummary/accessor_method.rst
    
    diff --git a/doc/_templates/autosummary/accessor.rst b/doc/_templates/autosummary/accessor.rst
    new file mode 100644
    index 00000000000..4ba745cd6fd
    --- /dev/null
    +++ b/doc/_templates/autosummary/accessor.rst
    @@ -0,0 +1,6 @@
    +{{ fullname }}
    +{{ underline }}
    +
    +.. currentmodule:: {{ module.split('.')[0] }}
    +
    +.. autoaccessor:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
    diff --git a/doc/_templates/autosummary/accessor_attribute.rst b/doc/_templates/autosummary/accessor_attribute.rst
    new file mode 100644
    index 00000000000..b5ad65d6a73
    --- /dev/null
    +++ b/doc/_templates/autosummary/accessor_attribute.rst
    @@ -0,0 +1,6 @@
    +{{ fullname }}
    +{{ underline }}
    +
    +.. currentmodule:: {{ module.split('.')[0] }}
    +
    +.. autoaccessorattribute:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
    diff --git a/doc/_templates/autosummary/accessor_callable.rst b/doc/_templates/autosummary/accessor_callable.rst
    new file mode 100644
    index 00000000000..7a3301814f5
    --- /dev/null
    +++ b/doc/_templates/autosummary/accessor_callable.rst
    @@ -0,0 +1,6 @@
    +{{ fullname }}
    +{{ underline }}
    +
    +.. currentmodule:: {{ module.split('.')[0] }}
    +
    +.. autoaccessorcallable:: {{ (module.split('.')[1:] + [objname]) | join('.') }}.__call__
    diff --git a/doc/_templates/autosummary/accessor_method.rst b/doc/_templates/autosummary/accessor_method.rst
    new file mode 100644
    index 00000000000..aefbba6ef1b
    --- /dev/null
    +++ b/doc/_templates/autosummary/accessor_method.rst
    @@ -0,0 +1,6 @@
    +{{ fullname }}
    +{{ underline }}
    +
    +.. currentmodule:: {{ module.split('.')[0] }}
    +
    +.. autoaccessormethod:: {{ (module.split('.')[1:] + [objname]) | join('.') }}
    diff --git a/doc/api.rst b/doc/api.rst
    index 3f25ac1a070..bb0edd0dfa5 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -233,6 +233,15 @@ Reshaping and reorganizing
        Dataset.sortby
        Dataset.broadcast_like
     
    +Plotting
    +--------
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   Dataset.plot.scatter
    +
     DataArray
     =========
     
    @@ -403,6 +412,122 @@ Computation
     :py:attr:`~core.groupby.DataArrayGroupBy.where`
     :py:attr:`~core.groupby.DataArrayGroupBy.quantile`
     
    +
    +String manipulation
    +-------------------
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   DataArray.str.capitalize
    +   DataArray.str.center
    +   DataArray.str.contains
    +   DataArray.str.count
    +   DataArray.str.decode
    +   DataArray.str.encode
    +   DataArray.str.endswith
    +   DataArray.str.find
    +   DataArray.str.get
    +   DataArray.str.index
    +   DataArray.str.isalnum
    +   DataArray.str.isalpha
    +   DataArray.str.isdecimal
    +   DataArray.str.isdigit
    +   DataArray.str.isnumeric
    +   DataArray.str.isspace
    +   DataArray.str.istitle
    +   DataArray.str.isupper
    +   DataArray.str.len
    +   DataArray.str.ljust
    +   DataArray.str.lower
    +   DataArray.str.lstrip
    +   DataArray.str.match
    +   DataArray.str.pad
    +   DataArray.str.repeat
    +   DataArray.str.replace
    +   DataArray.str.rfind
    +   DataArray.str.rindex
    +   DataArray.str.rjust
    +   DataArray.str.rstrip
    +   DataArray.str.slice
    +   DataArray.str.slice_replace
    +   DataArray.str.startswith
    +   DataArray.str.strip
    +   DataArray.str.swapcase
    +   DataArray.str.title
    +   DataArray.str.translate
    +   DataArray.str.upper
    +   DataArray.str.wrap
    +   DataArray.str.zfill
    +
    +Datetimelike properties
    +-----------------------
    +
    +**Datetime properties**:
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_attribute.rst
    +
    +   DataArray.dt.year
    +   DataArray.dt.month
    +   DataArray.dt.day
    +   DataArray.dt.hour
    +   DataArray.dt.minute
    +   DataArray.dt.second
    +   DataArray.dt.microsecond
    +   DataArray.dt.nanosecond
    +   DataArray.dt.weekofyear
    +   DataArray.dt.week
    +   DataArray.dt.dayofweek
    +   DataArray.dt.weekday
    +   DataArray.dt.weekday_name
    +   DataArray.dt.dayofyear
    +   DataArray.dt.quarter
    +   DataArray.dt.days_in_month
    +   DataArray.dt.daysinmonth
    +   DataArray.dt.season
    +   DataArray.dt.time
    +   DataArray.dt.is_month_start
    +   DataArray.dt.is_month_end
    +   DataArray.dt.is_quarter_end
    +   DataArray.dt.is_year_start
    +   DataArray.dt.is_leap_year
    +
    +**Datetime methods**:
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   DataArray.dt.floor
    +   DataArray.dt.ceil
    +   DataArray.dt.round
    +   DataArray.dt.strftime
    +
    +**Timedelta properties**:
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_attribute.rst
    +
    +   DataArray.dt.days
    +   DataArray.dt.seconds
    +   DataArray.dt.microseconds
    +   DataArray.dt.nanoseconds
    +
    +**Timedelta methods**:
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   DataArray.dt.floor
    +   DataArray.dt.ceil
    +   DataArray.dt.round
    +
    +
     Reshaping and reorganizing
     --------------------------
     
    @@ -419,6 +544,27 @@ Reshaping and reorganizing
        DataArray.sortby
        DataArray.broadcast_like
     
    +Plotting
    +--------
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_callable.rst
    +
    +   DataArray.plot
    +
    +.. autosummary::
    +   :toctree: generated/
    +   :template: autosummary/accessor_method.rst
    +
    +   DataArray.plot.contourf
    +   DataArray.plot.contour
    +   DataArray.plot.hist
    +   DataArray.plot.imshow
    +   DataArray.plot.line
    +   DataArray.plot.pcolormesh
    +   DataArray.plot.step
    +
     .. _api.ufuncs:
     
     Universal functions
    @@ -664,25 +810,6 @@ Creating custom indexes
     
        cftime_range
     
    -Plotting
    -========
    -
    -.. autosummary::
    -   :toctree: generated/
    -
    -   Dataset.plot
    -   plot.scatter
    -   DataArray.plot
    -   plot.plot
    -   plot.contourf
    -   plot.contour
    -   plot.hist
    -   plot.imshow
    -   plot.line
    -   plot.pcolormesh
    -   plot.step
    -   plot.FacetGrid
    -
     Faceting
     --------
     .. autosummary::
    diff --git a/doc/conf.py b/doc/conf.py
    index 6b16468d29e..d3d126cb33f 100644
    --- a/doc/conf.py
    +++ b/doc/conf.py
    @@ -20,6 +20,12 @@
     import sys
     from contextlib import suppress
     
    +# --------- autosummary templates ------------------
    +# TODO: eventually replace this with a sphinx.ext.auto_accessor module
    +import sphinx
    +from sphinx.ext.autodoc import AttributeDocumenter, Documenter, MethodDocumenter
    +from sphinx.util import rpartition
    +
     # make sure the source version is preferred (#3567)
     root = pathlib.Path(__file__).absolute().parent.parent
     os.environ["PYTHONPATH"] = str(root)
    @@ -358,3 +364,113 @@
         "dask": ("https://docs.dask.org/en/latest", None),
         "cftime": ("https://unidata.github.io/cftime", None),
     }
    +
    +
    +# --------- autosummary templates ------------------
    +# TODO: eventually replace this with a sphinx.ext.auto_accessor module
    +class AccessorDocumenter(MethodDocumenter):
    +    """
    +    Specialized Documenter subclass for accessors.
    +    """
    +
    +    objtype = "accessor"
    +    directivetype = "method"
    +
    +    # lower than MethodDocumenter so this is not chosen for normal methods
    +    priority = 0.6
    +
    +    def format_signature(self):
    +        # this method gives an error/warning for the accessors, therefore
    +        # overriding it (accessor has no arguments)
    +        return ""
    +
    +
    +class AccessorLevelDocumenter(Documenter):
    +    """
    +    Specialized Documenter subclass for objects on accessor level (methods,
    +    attributes).
    +    """
    +
    +    # This is the simple straightforward version
    +    # modname is None, base the last elements (eg 'hour')
    +    # and path the part before (eg 'Series.dt')
    +    # def resolve_name(self, modname, parents, path, base):
    +    #     modname = 'pandas'
    +    #     mod_cls = path.rstrip('.')
    +    #     mod_cls = mod_cls.split('.')
    +    #
    +    #     return modname, mod_cls + [base]
    +
    +    def resolve_name(self, modname, parents, path, base):
    +        if modname is None:
    +            if path:
    +                mod_cls = path.rstrip(".")
    +            else:
    +                mod_cls = None
    +                # if documenting a class-level object without path,
    +                # there must be a current class, either from a parent
    +                # auto directive ...
    +                mod_cls = self.env.temp_data.get("autodoc:class")
    +                # ... or from a class directive
    +                if mod_cls is None:
    +                    mod_cls = self.env.temp_data.get("py:class")
    +                # ... if still None, there's no way to know
    +                if mod_cls is None:
    +                    return None, []
    +            # HACK: this is added in comparison to ClassLevelDocumenter
    +            # mod_cls still exists of class.accessor, so an extra
    +            # rpartition is needed
    +            modname, accessor = rpartition(mod_cls, ".")
    +            modname, cls = rpartition(modname, ".")
    +            parents = [cls, accessor]
    +            # if the module name is still missing, get it like above
    +            if not modname:
    +                modname = self.env.temp_data.get("autodoc:module")
    +            if not modname:
    +                if sphinx.__version__ > "1.3":
    +                    modname = self.env.ref_context.get("py:module")
    +                else:
    +                    modname = self.env.temp_data.get("py:module")
    +            # ... else, it stays None, which means invalid
    +        return modname, parents + [base]
    +
    +
    +class AccessorAttributeDocumenter(AccessorLevelDocumenter, AttributeDocumenter):
    +
    +    objtype = "accessorattribute"
    +    directivetype = "attribute"
    +
    +    # lower than AttributeDocumenter so this is not chosen for normal attributes
    +    priority = 0.6
    +
    +
    +class AccessorMethodDocumenter(AccessorLevelDocumenter, MethodDocumenter):
    +
    +    objtype = "accessormethod"
    +    directivetype = "method"
    +
    +    # lower than MethodDocumenter so this is not chosen for normal methods
    +    priority = 0.6
    +
    +
    +class AccessorCallableDocumenter(AccessorLevelDocumenter, MethodDocumenter):
    +    """
    +    This documenter lets us removes .__call__ from the method signature for
    +    callable accessors like Series.plot
    +    """
    +
    +    objtype = "accessorcallable"
    +    directivetype = "method"
    +
    +    # lower than MethodDocumenter; otherwise the doc build prints warnings
    +    priority = 0.5
    +
    +    def format_name(self):
    +        return MethodDocumenter.format_name(self).rstrip(".__call__")
    +
    +
    +def setup(app):
    +    app.add_autodocumenter(AccessorDocumenter)
    +    app.add_autodocumenter(AccessorAttributeDocumenter)
    +    app.add_autodocumenter(AccessorMethodDocumenter)
    +    app.add_autodocumenter(AccessorCallableDocumenter)
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 68b2d738073..dade282d49a 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -174,6 +174,8 @@ Documentation
       By `Justus Magin `_.
     - Narrative documentation now describes :py:meth:`map_blocks`: :ref:`dask.automatic-parallelization`.
       By `Deepak Cherian `_.
    +- Document ``.plot``, ``.dt``, ``.str`` accessors the way they are called. (:issue:`3625`, :pull:`3988`)
    +  By `Justus Magin `_.
     - Add documentation for the parameters and return values of :py:meth:`DataArray.sel`.
       By `Justus Magin `_.
     
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index 44773e36e30..5814c828663 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -260,7 +260,7 @@ class DataArray(AbstractArray, DataWithCoords):
         _resample_cls = resample.DataArrayResample
         _weighted_cls = weighted.DataArrayWeighted
     
    -    dt = property(CombinedDatetimelikeAccessor)
    +    dt = utils.UncachedAccessor(CombinedDatetimelikeAccessor)
     
         def __init__(
             self,
    @@ -2722,24 +2722,7 @@ def func(self, other):
         def _copy_attrs_from(self, other: Union["DataArray", Dataset, Variable]) -> None:
             self.attrs = other.attrs
     
    -    @property
    -    def plot(self) -> _PlotMethods:
    -        """
    -        Access plotting functions for DataArray's
    -
    -        >>> d = xr.DataArray([[1, 2], [3, 4]])
    -
    -        For convenience just call this directly
    -
    -        >>> d.plot()
    -
    -        Or use it as a namespace to use xarray.plot functions as
    -        DataArray methods
    -
    -        >>> d.plot.imshow()  # equivalent to xarray.plot.imshow(d)
    -
    -        """
    -        return _PlotMethods(self)
    +    plot = utils.UncachedAccessor(_PlotMethods)
     
         def _title_for_slice(self, truncate: int = 50) -> str:
             """
    @@ -3831,7 +3814,7 @@ def idxmax(
     
         # this needs to be at the end, or mypy will confuse with `str`
         # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
    -    str = property(StringAccessor)
    +    str = utils.UncachedAccessor(StringAccessor)
     
     
     # priority most be higher than Variable to properly work with binary ufuncs
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index a8011afd3e3..2958cad89b2 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -5563,16 +5563,7 @@ def real(self):
         def imag(self):
             return self._unary_op(lambda x: x.imag, keep_attrs=True)(self)
     
    -    @property
    -    def plot(self):
    -        """
    -        Access plotting functions for Datasets.
    -        Use it as a namespace to use xarray.plot functions as Dataset methods
    -
    -        >>> ds.plot.scatter(...)  # equivalent to xarray.plot.scatter(ds,...)
    -
    -        """
    -        return _Dataset_PlotMethods(self)
    +    plot = utils.UncachedAccessor(_Dataset_PlotMethods)
     
         def filter_by_attrs(self, **kwargs):
             """Returns a ``Dataset`` with variables that match specific conditions.
    diff --git a/xarray/core/utils.py b/xarray/core/utils.py
    index 1126cf3037f..0542f850b02 100644
    --- a/xarray/core/utils.py
    +++ b/xarray/core/utils.py
    @@ -787,6 +787,24 @@ def drop_dims_from_indexers(
             )
     
     
    +class UncachedAccessor:
    +    """ Acts like a property, but on both classes and class instances
    +
    +    This class is necessary because some tools (e.g. pydoc and sphinx)
    +    inspect classes for which property returns itself and not the
    +    accessor.
    +    """
    +
    +    def __init__(self, accessor):
    +        self._accessor = accessor
    +
    +    def __get__(self, obj, cls):
    +        if obj is None:
    +            return self._accessor
    +
    +        return self._accessor(obj)
    +
    +
     # Singleton type, as per https://github.com/python/typing/pull/240
     class Default(Enum):
         token = 0
    diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py
    index 19a3f1e63e3..e4a981daf8c 100644
    --- a/xarray/plot/plot.py
    +++ b/xarray/plot/plot.py
    @@ -445,6 +445,11 @@ def __init__(self, darray):
         def __call__(self, **kwargs):
             return plot(self._da, **kwargs)
     
    +    # we can't use functools.wraps here since that also modifies the name / qualname
    +    __doc__ = __call__.__doc__ = plot.__doc__
    +    __call__.__wrapped__ = plot  # type: ignore
    +    __call__.__annotations__ = plot.__annotations__
    +
         @functools.wraps(hist)
         def hist(self, ax=None, **kwargs):
             return hist(self._da, ax=ax, **kwargs)
    diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
    index 6497987e813..c26d105a713 100644
    --- a/xarray/tests/test_plot.py
    +++ b/xarray/tests/test_plot.py
    @@ -111,6 +111,12 @@ class TestPlot(PlotTestCase):
         def setup_array(self):
             self.darray = DataArray(easy_array((2, 3, 4)))
     
    +    def test_accessor(self):
    +        from ..plot.plot import _PlotMethods
    +
    +        assert DataArray.plot is _PlotMethods
    +        assert isinstance(self.darray.plot, _PlotMethods)
    +
         def test_label_from_attrs(self):
             da = self.darray.copy()
             assert "" == label_from_attrs(da)
    @@ -2098,6 +2104,12 @@ def setUp(self):
             ds.B.attrs["units"] = "Bunits"
             self.ds = ds
     
    +    def test_accessor(self):
    +        from ..plot.dataset_plot import _Dataset_PlotMethods
    +
    +        assert Dataset.plot is _Dataset_PlotMethods
    +        assert isinstance(self.ds.plot, _Dataset_PlotMethods)
    +
         @pytest.mark.parametrize(
             "add_guide, hue_style, legend, colorbar",
             [
    
    From 2ba530026fb273a2882869a6e09ede053a0f081b Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sat, 13 Jun 2020 19:53:02 +0200
    Subject: [PATCH 50/71] provide a error summary for assert_allclose (#3847)
    
    * allow passing a callable as compat to diff_{dataset,array}_repr
    
    * rewrite assert_allclose to provide a failure summary
    
    * make sure we're comparing variables
    
    * remove spurious comments
    
    * override test_aggregate_complex with a test compatible with pint
    
    * expect the asserts to raise
    
    * xfail the tests failing due to isclose not accepting non-quantity tolerances
    
    * mark top-level function tests as xfailing if they use assert_allclose
    
    * mark test_1d_math as runnable but xfail it
    
    * bump dask and distributed
    
    * entry to whats-new.rst
    
    * attempt to fix the failing py36-min-all-deps and py36-min-nep18 CI
    
    * conditionally xfail tests using assert_allclose with pint < 0.12
    
    * xfail more tests depending on which pint version is used
    
    * try using numpy.testing.assert_allclose instead
    
    * try computing if the dask version is too old and dask.array[bool]
    
    * fix the dask version checking
    
    * convert all dask arrays to numpy when using a insufficient dask version
    ---
     ci/requirements/py36-min-all-deps.yml |  4 +-
     ci/requirements/py36-min-nep18.yml    |  4 +-
     doc/whats-new.rst                     |  2 +
     xarray/core/duck_array_ops.py         | 20 +++++++++
     xarray/core/formatting.py             | 16 ++++++-
     xarray/testing.py                     | 43 +++++++++++--------
     xarray/tests/test_duck_array_ops.py   |  2 +-
     xarray/tests/test_testing.py          | 25 +++++++++++
     xarray/tests/test_units.py            | 62 ++++++++++++++++++++++++++-
     9 files changed, 150 insertions(+), 28 deletions(-)
    
    diff --git a/ci/requirements/py36-min-all-deps.yml b/ci/requirements/py36-min-all-deps.yml
    index 86540197dcc..a72cd000680 100644
    --- a/ci/requirements/py36-min-all-deps.yml
    +++ b/ci/requirements/py36-min-all-deps.yml
    @@ -15,8 +15,8 @@ dependencies:
       - cfgrib=0.9
       - cftime=1.0
       - coveralls
    -  - dask=2.2
    -  - distributed=2.2
    +  - dask=2.5
    +  - distributed=2.5
       - flake8
       - h5netcdf=0.7
       - h5py=2.9  # Policy allows for 2.10, but it's a conflict-fest
    diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
    index a5eded49cd4..a2245e89b41 100644
    --- a/ci/requirements/py36-min-nep18.yml
    +++ b/ci/requirements/py36-min-nep18.yml
    @@ -6,8 +6,8 @@ dependencies:
       # require drastically newer packages than everything else
       - python=3.6
       - coveralls
    -  - dask=2.4
    -  - distributed=2.4
    +  - dask=2.5
    +  - distributed=2.5
       - msgpack-python=0.6  # remove once distributed is bumped. distributed GH3491
       - numpy=1.17
       - pandas=0.25
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index dade282d49a..bcff60ce4df 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -252,6 +252,8 @@ New Features
       :py:meth:`core.groupby.DatasetGroupBy.quantile`, :py:meth:`core.groupby.DataArrayGroupBy.quantile`
       (:issue:`3843`, :pull:`3844`)
       By `Aaron Spring `_.
    +- Add a diff summary for `testing.assert_allclose`. (:issue:`3617`, :pull:`3847`)
    +  By `Justus Magin `_.
     
     Bug fixes
     ~~~~~~~~~
    diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
    index 1340b456cf2..76719699168 100644
    --- a/xarray/core/duck_array_ops.py
    +++ b/xarray/core/duck_array_ops.py
    @@ -6,6 +6,7 @@
     import contextlib
     import inspect
     import warnings
    +from distutils.version import LooseVersion
     from functools import partial
     
     import numpy as np
    @@ -20,6 +21,14 @@
     except ImportError:
         dask_array = None  # type: ignore
     
    +# TODO: remove after we stop supporting dask < 2.9.1
    +try:
    +    import dask
    +
    +    dask_version = dask.__version__
    +except ImportError:
    +    dask_version = None
    +
     
     def _dask_or_eager_func(
         name,
    @@ -199,8 +208,19 @@ def allclose_or_equiv(arr1, arr2, rtol=1e-5, atol=1e-8):
         """
         arr1 = asarray(arr1)
         arr2 = asarray(arr2)
    +
         lazy_equiv = lazy_array_equiv(arr1, arr2)
         if lazy_equiv is None:
    +        # TODO: remove after we require dask >= 2.9.1
    +        sufficient_dask_version = (
    +            dask_version is not None and LooseVersion(dask_version) >= "2.9.1"
    +        )
    +        if not sufficient_dask_version and any(
    +            isinstance(arr, dask_array_type) for arr in [arr1, arr2]
    +        ):
    +            arr1 = np.array(arr1)
    +            arr2 = np.array(arr2)
    +
             return bool(isclose(arr1, arr2, rtol=rtol, atol=atol, equal_nan=True).all())
         else:
             return lazy_equiv
    diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
    index d6732fc182e..bd9576a4440 100644
    --- a/xarray/core/formatting.py
    +++ b/xarray/core/formatting.py
    @@ -539,7 +539,10 @@ def extra_items_repr(extra_keys, mapping, ab_side):
         for k in a_keys & b_keys:
             try:
                 # compare xarray variable
    -            compatible = getattr(a_mapping[k], compat)(b_mapping[k])
    +            if not callable(compat):
    +                compatible = getattr(a_mapping[k], compat)(b_mapping[k])
    +            else:
    +                compatible = compat(a_mapping[k], b_mapping[k])
                 is_variable = True
             except AttributeError:
                 # compare attribute value
    @@ -596,8 +599,13 @@ def extra_items_repr(extra_keys, mapping, ab_side):
     
     
     def _compat_to_str(compat):
    +    if callable(compat):
    +        compat = compat.__name__
    +
         if compat == "equals":
             return "equal"
    +    elif compat == "allclose":
    +        return "close"
         else:
             return compat
     
    @@ -611,8 +619,12 @@ def diff_array_repr(a, b, compat):
         ]
     
         summary.append(diff_dim_summary(a, b))
    +    if callable(compat):
    +        equiv = compat
    +    else:
    +        equiv = array_equiv
     
    -    if not array_equiv(a.data, b.data):
    +    if not equiv(a.data, b.data):
             temp = [wrap_indent(short_numpy_repr(obj), start="    ") for obj in (a, b)]
             diff_data_repr = [
                 ab_side + "\n" + ab_data_repr
    diff --git a/xarray/testing.py b/xarray/testing.py
    index e7bf5f9221a..9681503414e 100644
    --- a/xarray/testing.py
    +++ b/xarray/testing.py
    @@ -1,10 +1,11 @@
     """Testing functions exposed to the user API"""
    +import functools
     from typing import Hashable, Set, Union
     
     import numpy as np
     import pandas as pd
     
    -from xarray.core import duck_array_ops, formatting
    +from xarray.core import duck_array_ops, formatting, utils
     from xarray.core.dataarray import DataArray
     from xarray.core.dataset import Dataset
     from xarray.core.indexes import default_indexes
    @@ -118,27 +119,31 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
         """
         __tracebackhide__ = True
         assert type(a) == type(b)
    -    kwargs = dict(rtol=rtol, atol=atol, decode_bytes=decode_bytes)
    +
    +    equiv = functools.partial(
    +        _data_allclose_or_equiv, rtol=rtol, atol=atol, decode_bytes=decode_bytes
    +    )
    +    equiv.__name__ = "allclose"
    +
    +    def compat_variable(a, b):
    +        a = getattr(a, "variable", a)
    +        b = getattr(b, "variable", b)
    +
    +        return a.dims == b.dims and (a._data is b._data or equiv(a.data, b.data))
    +
         if isinstance(a, Variable):
    -        assert a.dims == b.dims
    -        allclose = _data_allclose_or_equiv(a.values, b.values, **kwargs)
    -        assert allclose, f"{a.values}\n{b.values}"
    +        allclose = compat_variable(a, b)
    +        assert allclose, formatting.diff_array_repr(a, b, compat=equiv)
         elif isinstance(a, DataArray):
    -        assert_allclose(a.variable, b.variable, **kwargs)
    -        assert set(a.coords) == set(b.coords)
    -        for v in a.coords.variables:
    -            # can't recurse with this function as coord is sometimes a
    -            # DataArray, so call into _data_allclose_or_equiv directly
    -            allclose = _data_allclose_or_equiv(
    -                a.coords[v].values, b.coords[v].values, **kwargs
    -            )
    -            assert allclose, "{}\n{}".format(a.coords[v].values, b.coords[v].values)
    +        allclose = utils.dict_equiv(
    +            a.coords, b.coords, compat=compat_variable
    +        ) and compat_variable(a.variable, b.variable)
    +        assert allclose, formatting.diff_array_repr(a, b, compat=equiv)
         elif isinstance(a, Dataset):
    -        assert set(a.data_vars) == set(b.data_vars)
    -        assert set(a.coords) == set(b.coords)
    -        for k in list(a.variables) + list(a.coords):
    -            assert_allclose(a[k], b[k], **kwargs)
    -
    +        allclose = a._coord_names == b._coord_names and utils.dict_equiv(
    +            a.variables, b.variables, compat=compat_variable
    +        )
    +        assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv)
         else:
             raise TypeError("{} not supported by assertion comparison".format(type(a)))
     
    diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py
    index e61881cfce3..feedcd27164 100644
    --- a/xarray/tests/test_duck_array_ops.py
    +++ b/xarray/tests/test_duck_array_ops.py
    @@ -384,7 +384,7 @@ def test_reduce(dim_num, dtype, dask, func, skipna, aggdim):
     
                     actual = getattr(da, func)(skipna=skipna, dim=aggdim)
                     assert_dask_array(actual, dask)
    -                assert np.allclose(
    +                np.testing.assert_allclose(
                         actual.values, np.array(expected), rtol=1.0e-4, equal_nan=True
                     )
                 except (TypeError, AttributeError, ZeroDivisionError):
    diff --git a/xarray/tests/test_testing.py b/xarray/tests/test_testing.py
    index 041b7341ade..f4961af58e9 100644
    --- a/xarray/tests/test_testing.py
    +++ b/xarray/tests/test_testing.py
    @@ -1,3 +1,5 @@
    +import pytest
    +
     import xarray as xr
     
     
    @@ -5,3 +7,26 @@ def test_allclose_regression():
         x = xr.DataArray(1.01)
         y = xr.DataArray(1.02)
         xr.testing.assert_allclose(x, y, atol=0.01)
    +
    +
    +@pytest.mark.parametrize(
    +    "obj1,obj2",
    +    (
    +        pytest.param(
    +            xr.Variable("x", [1e-17, 2]), xr.Variable("x", [0, 3]), id="Variable",
    +        ),
    +        pytest.param(
    +            xr.DataArray([1e-17, 2], dims="x"),
    +            xr.DataArray([0, 3], dims="x"),
    +            id="DataArray",
    +        ),
    +        pytest.param(
    +            xr.Dataset({"a": ("x", [1e-17, 2]), "b": ("y", [-2e-18, 2])}),
    +            xr.Dataset({"a": ("x", [0, 2]), "b": ("y", [0, 1])}),
    +            id="Dataset",
    +        ),
    +    ),
    +)
    +def test_assert_allclose(obj1, obj2):
    +    with pytest.raises(AssertionError):
    +        xr.testing.assert_allclose(obj1, obj2)
    diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
    index 5dd4a42cff0..6f4f9f768d9 100644
    --- a/xarray/tests/test_units.py
    +++ b/xarray/tests/test_units.py
    @@ -425,6 +425,10 @@ def test_apply_ufunc_dataset(dtype):
         assert_identical(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -512,6 +516,10 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype):
         assert_allclose(expected_b, actual_b)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -929,6 +937,10 @@ def test_concat_dataset(variant, unit, error, dtype):
         assert_identical(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -1036,6 +1048,10 @@ def test_merge_dataarray(variant, unit, error, dtype):
         assert_allclose(expected, actual)
     
     
    +# TODO: remove once pint==0.12 has been released
    +@pytest.mark.xfail(
    +    LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +)
     @pytest.mark.parametrize(
         "unit,error",
         (
    @@ -1385,7 +1401,6 @@ def wrapper(cls):
         "test_datetime64_conversion",
         "test_timedelta64_conversion",
         "test_pandas_period_index",
    -    "test_1d_math",
         "test_1d_reduce",
         "test_array_interface",
         "test___array__",
    @@ -1413,6 +1428,13 @@ def example_1d_objects(self):
             ]:
                 yield (self.cls("x", data), data)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
    +    def test_real_and_imag(self):
    +        super().test_real_and_imag()
    +
         @pytest.mark.parametrize(
             "func",
             (
    @@ -1450,6 +1472,22 @@ def test_aggregation(self, func, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
    +    def test_aggregate_complex(self):
    +        variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m)
    +        expected = xr.Variable((), (0.5 + 1j) * unit_registry.m)
    +        actual = variable.mean()
    +
    +        assert_units_equal(expected, actual)
    +        xr.testing.assert_allclose(expected, actual)
    +
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    @@ -1748,6 +1786,10 @@ def test_isel(self, indices, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "unit,error",
             (
    @@ -2224,6 +2266,10 @@ def test_repr(self, func, variant, dtype):
             # warnings or errors, but does not check the result
             func(data_array)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose",
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    @@ -2235,7 +2281,7 @@ def test_repr(self, func, variant, dtype):
                 function("mean"),
                 pytest.param(
                     function("median"),
    -                marks=pytest.mark.xfail(
    +                marks=pytest.mark.skip(
                         reason="median does not work with dataarrays yet"
                     ),
                 ),
    @@ -3283,6 +3329,10 @@ def test_head_tail_thin(self, func, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
             "func",
    @@ -3356,6 +3406,10 @@ def test_interp_reindex_indexing(self, func, unit, error, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize("variant", ("data", "coords"))
         @pytest.mark.parametrize(
             "func",
    @@ -3558,6 +3612,10 @@ def test_computation(self, func, dtype):
             assert_units_equal(expected, actual)
             xr.testing.assert_identical(expected, actual)
     
    +    # TODO: remove once pint==0.12 has been released
    +    @pytest.mark.xfail(
    +        LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose"
    +    )
         @pytest.mark.parametrize(
             "func",
             (
    
    From bc5c79e5f79d8d7fbb1ed593a5413028a1bdfb36 Mon Sep 17 00:00:00 2001
    From: Noah D Brenowitz 
    Date: Mon, 15 Jun 2020 04:25:52 -0700
    Subject: [PATCH 51/71] Improve typehints of xr.Dataset.__getitem__ (#4144)
    
    * Improve typehints of xr.Dataset.__getitem__
    
    Resolves #4125
    
    * Add overload for Mapping behavior
    
    Sadly this is not working with my version of mypy. See https://github.com/python/mypy/issues/7328
    
    * Overload only Hashable inputs
    
    Given mypy's use of overloads, I think this is all we can do. If the argument is not Hashable, then return the Union type as before.
    
    * Lint
    
    * Quote the DataArray to avoid error in py3.6
    
    * Code review
    
    Co-authored-by: crusaderky 
    ---
     .pre-commit-config.yaml  |  2 +-
     ci/requirements/py38.yml |  2 +-
     xarray/core/dataset.py   | 17 +++++++++++++++--
     xarray/core/weighted.py  |  6 +++---
     4 files changed, 20 insertions(+), 7 deletions(-)
    
    diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
    index 26bf4803ef6..1d384e58a3c 100644
    --- a/.pre-commit-config.yaml
    +++ b/.pre-commit-config.yaml
    @@ -16,7 +16,7 @@ repos:
         hooks:
           - id: flake8
       - repo: https://github.com/pre-commit/mirrors-mypy
    -    rev: v0.761  # Must match ci/requirements/*.yml
    +    rev: v0.780  # Must match ci/requirements/*.yml
         hooks:
           - id: mypy
       # run this occasionally, ref discussion https://github.com/pydata/xarray/pull/3194
    diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml
    index 24602f884e9..7dff3a1bd97 100644
    --- a/ci/requirements/py38.yml
    +++ b/ci/requirements/py38.yml
    @@ -22,7 +22,7 @@ dependencies:
       - isort
       - lxml    # Optional dep of pydap
       - matplotlib
    -  - mypy=0.761  # Must match .pre-commit-config.yaml
    +  - mypy=0.780  # Must match .pre-commit-config.yaml
       - nc-time-axis
       - netcdf4
       - numba
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index 2958cad89b2..a024324bcb1 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -27,6 +27,7 @@
         TypeVar,
         Union,
         cast,
    +    overload,
     )
     
     import numpy as np
    @@ -1241,13 +1242,25 @@ def loc(self) -> _LocIndexer:
             """
             return _LocIndexer(self)
     
    -    def __getitem__(self, key: Any) -> "Union[DataArray, Dataset]":
    +    # FIXME https://github.com/python/mypy/issues/7328
    +    @overload
    +    def __getitem__(self, key: Mapping) -> "Dataset":  # type: ignore
    +        ...
    +
    +    @overload
    +    def __getitem__(self, key: Hashable) -> "DataArray":  # type: ignore
    +        ...
    +
    +    @overload
    +    def __getitem__(self, key: Any) -> "Dataset":
    +        ...
    +
    +    def __getitem__(self, key):
             """Access variables or coordinates this dataset as a
             :py:class:`~xarray.DataArray`.
     
             Indexing with a list of names will return a new ``Dataset`` object.
             """
    -        # TODO(shoyer): type this properly: https://github.com/python/mypy/issues/7328
             if utils.is_dict_like(key):
                 return self.isel(**cast(Mapping, key))
     
    diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py
    index 21ed06ea85f..fa143342c06 100644
    --- a/xarray/core/weighted.py
    +++ b/xarray/core/weighted.py
    @@ -72,11 +72,11 @@ class Weighted:
         def __init__(self, obj: "DataArray", weights: "DataArray") -> None:
             ...
     
    -    @overload  # noqa: F811
    -    def __init__(self, obj: "Dataset", weights: "DataArray") -> None:  # noqa: F811
    +    @overload
    +    def __init__(self, obj: "Dataset", weights: "DataArray") -> None:
             ...
     
    -    def __init__(self, obj, weights):  # noqa: F811
    +    def __init__(self, obj, weights):
             """
             Create a Weighted object
     
    
    From 6f272b5693913d4e6a989cbf2e8b18d02a71cb4c Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Mon, 15 Jun 2020 23:35:43 -0700
    Subject: [PATCH 52/71] Fix failing upstream-dev build & remove docs build
     (#4160)
    
    Instead, we'll use RTD's new doc builder instead. For an example, click on
    "docs/readthedocs.org:xray" below or look at GH4159
    ---
     azure-pipelines.yml  | 18 ------------------
     ci/azure/install.yml |  5 ++++-
     2 files changed, 4 insertions(+), 19 deletions(-)
    
    diff --git a/azure-pipelines.yml b/azure-pipelines.yml
    index ff85501c555..e04c8f74f68 100644
    --- a/azure-pipelines.yml
    +++ b/azure-pipelines.yml
    @@ -108,21 +108,3 @@ jobs:
           python ci/min_deps_check.py ci/requirements/py36-bare-minimum.yml
           python ci/min_deps_check.py ci/requirements/py36-min-all-deps.yml
         displayName: minimum versions policy
    -
    -- job: Docs
    -  pool:
    -    vmImage: 'ubuntu-16.04'
    -  steps:
    -  - template: ci/azure/install.yml
    -    parameters:
    -      env_file: ci/requirements/doc.yml
    -  - bash: |
    -      source activate xarray-tests
    -      # Replicate the exact environment created by the readthedocs CI
    -      conda install --yes --quiet -c pkgs/main mock pillow sphinx sphinx_rtd_theme
    -    displayName: Replicate readthedocs CI environment
    -  - bash: |
    -      source activate xarray-tests
    -      cd doc
    -      sphinx-build -W --keep-going -j auto -b html -d _build/doctrees . _build/html
    -    displayName: Build HTML docs
    diff --git a/ci/azure/install.yml b/ci/azure/install.yml
    index eff229e863a..83895eebe01 100644
    --- a/ci/azure/install.yml
    +++ b/ci/azure/install.yml
    @@ -10,6 +10,8 @@ steps:
         conda env create -n xarray-tests --file ${{ parameters.env_file }}
       displayName: Install conda dependencies
     
    +# TODO: add sparse back in, once Numba works with the development version of
    +# NumPy again: https://github.com/pydata/xarray/issues/4146 
     - bash: |
         source activate xarray-tests
         conda uninstall -y --force \
    @@ -23,7 +25,8 @@ steps:
             cftime \
             rasterio \
             pint \
    -        bottleneck
    +        bottleneck \
    +        sparse
         python -m pip install \
             -i https://pypi.anaconda.org/scipy-wheels-nightly/simple \
             --no-deps \
    
    From 52bb0a22ed25195a1b47b693f1881c90f15983e6 Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Wed, 17 Jun 2020 09:50:56 -0700
    Subject: [PATCH 53/71] Update issue templates inspired/based on dask (#4154)
    
    * Update issue templates based on dask
    
    * add config.yml for issue template
    ---
     .github/ISSUE_TEMPLATE/bug-report.md      | 39 +++++++++++++++++++++++
     .github/ISSUE_TEMPLATE/bug_report.md      | 35 --------------------
     .github/ISSUE_TEMPLATE/config.yml         |  5 +++
     .github/ISSUE_TEMPLATE/feature-request.md | 22 +++++++++++++
     4 files changed, 66 insertions(+), 35 deletions(-)
     create mode 100644 .github/ISSUE_TEMPLATE/bug-report.md
     delete mode 100644 .github/ISSUE_TEMPLATE/bug_report.md
     create mode 100644 .github/ISSUE_TEMPLATE/config.yml
     create mode 100644 .github/ISSUE_TEMPLATE/feature-request.md
    
    diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md
    new file mode 100644
    index 00000000000..02bc5d0f7b0
    --- /dev/null
    +++ b/.github/ISSUE_TEMPLATE/bug-report.md
    @@ -0,0 +1,39 @@
    +---
    +name: Bug report
    +about: Create a report to help us improve
    +title: ''
    +labels: ''
    +assignees: ''
    +
    +---
    +
    +
    +
    +**What happened**:
    +
    +**What you expected to happen**:
    +
    +**Minimal Complete Verifiable Example**:
    +
    +```python
    +# Put your MCVE code here
    +```
    +
    +**Anything else we need to know?**:
    +
    +**Environment**:
    +
    +
    Output of xr.show_versions() + + + + +
    diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index c712cf27979..00000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -name: Bug report / Feature request -about: 'Post a problem or idea' -title: '' -labels: '' -assignees: '' - ---- - - - - -#### MCVE Code Sample - - -```python -# Your code here - -``` - -#### Expected Output - - -#### Problem Description - - - -#### Versions - -
    Output of xr.show_versions() - - - - -
    diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 00000000000..3389fbfe071 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,5 @@ +blank_issues_enabled: true +contact_links: + - name: General Question + url: https://stackoverflow.com/questions/tagged/python-xarray + about: "If you have a question like *How do I append to an xarray.Dataset?* then please ask on Stack Overflow using the #python-xarray tag." diff --git a/.github/ISSUE_TEMPLATE/feature-request.md b/.github/ISSUE_TEMPLATE/feature-request.md new file mode 100644 index 00000000000..7021fe490aa --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature-request.md @@ -0,0 +1,22 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + + + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context about the feature request here. From ad0a76bbf4a88ab03d495ba74423b0daa08d75ec Mon Sep 17 00:00:00 2001 From: Ray Bell Date: Wed, 17 Jun 2020 12:52:29 -0400 Subject: [PATCH 54/71] drop eccodes in docs (#4162) Co-authored-by: Ray Bell --- doc/io.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 1f854586202..4aac5e0b6f7 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -994,8 +994,8 @@ be done directly from zarr, as described in the GRIB format via cfgrib ---------------------- -xarray supports reading GRIB files via ECMWF cfgrib_ python driver and ecCodes_ -C-library, if they are installed. To open a GRIB file supply ``engine='cfgrib'`` +xarray supports reading GRIB files via ECMWF cfgrib_ python driver, +if it is installed. To open a GRIB file supply ``engine='cfgrib'`` to :py:func:`open_dataset`: .. ipython:: @@ -1003,13 +1003,11 @@ to :py:func:`open_dataset`: In [1]: ds_grib = xr.open_dataset("example.grib", engine="cfgrib") -We recommend installing ecCodes via conda:: +We recommend installing cfgrib via conda:: - conda install -c conda-forge eccodes - pip install cfgrib + conda install -c conda-forge cfgrib .. _cfgrib: https://github.com/ecmwf/cfgrib -.. _ecCodes: https://confluence.ecmwf.int/display/ECC/ecCodes+Home .. _io.pynio: From 66e77309ad48a5ad0dbe774c5500bb52775b9372 Mon Sep 17 00:00:00 2001 From: keewis Date: Wed, 17 Jun 2020 22:40:07 +0200 Subject: [PATCH 55/71] pint support for Dataset (#3975) * remove the xfail marks from all aggregations except prod and np.median * rewrite the aggregation tests * rewrite the repr tests it still does not check the content of the repr, though * rewrite some more tests * simplify the numpy-method-with-args tests * always use the same data units unless the compatibility is tested * partially rewrite more tests * rewrite combine_first This also adds tests for units in indexes, which are by default stripped. * simplify the comparisons test a bit * skip the tests for identical * remove the map_values function * only call convert_units if necessary * use assert_units_equal and assert_equal in broadcast_like and skip it * remove the conditional skip since pint now supports __array_function__ * only skip the broadcast_like tests if we attempt to put units in indexes * remove the xfail mark from the where tests * reimplement the broadcast_equals tests * reimplement the tests on stacked arrays * refactor the to_stacked_array tests this test is marked as skipped because the unit registry always returns numpy.array objects which are not hashable, so the initial dataset with units cannot be constructed (the result of to_stacked_array wouldn't be correct either because IndexVariable doesn't support units) * fix the stacking and reordering tests * don't create a coordinate for the isel tests * separate the tests for units in dims from the tests for units in data * refactor the dataset constructor tests * fix the repr tests * raise on all warnings * rename merge_mappings to zip_mappings * rename merge_dicts to merge_mappings * make the missing value filling tests raise on warnings * remove a leftover assert_equal_with_units * refactor the sel tests * make the loc tests a slightly modified copy of the sel tests * make the drop_sel tests a slightly modified version of the sel tests * refactor the head / tail / thin tests * refactor the squeeze tests to not have multiple tests per case * skip the head / tail / thin tests with units in dimensions * combine the interp and reindex tests * combine the interp_like and reindex_like tests * refactor the computation tests * rewrite the computation objects tests * rewrite the resample tests * rewrite the grouped operations tests * rewrite the content manipulation tests * refactor the merge tests * remove the old assert_equal_with_units function * xfail the groupby_bins tests for now * fix and use allclose * filterwarnings for the whole TestDataset class * modify the squeeze tests to not use units in indexes * replace skip with xfail * update whats-new.rst * update the xfail reason for the rolling_exp tests * temporarily use pip to install pint since the feedstock seems to take a while * don't use pip to install pint * update the xfail to require at least 0.12.1 * xfail the prod tests * filter only UnitStrippedWarning * remove unncessary commas --- ci/requirements/py36-min-nep18.yml | 2 +- doc/whats-new.rst | 2 +- xarray/tests/test_units.py | 1463 +++++++++++++--------------- 3 files changed, 699 insertions(+), 768 deletions(-) diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml index a2245e89b41..48b9c057260 100644 --- a/ci/requirements/py36-min-nep18.yml +++ b/ci/requirements/py36-min-nep18.yml @@ -11,7 +11,7 @@ dependencies: - msgpack-python=0.6 # remove once distributed is bumped. distributed GH3491 - numpy=1.17 - pandas=0.25 - - pint=0.11 + - pint - pip - pytest - pytest-cov diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bcff60ce4df..4b5bb1e491f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -72,7 +72,7 @@ New Features - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`, :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`. (:pull:`3922`) By `Kai Mühlbauer `_. -- More support for unit aware arrays with pint (:pull:`3643`) +- More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`) By `Justus Magin `_. - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even without ``append_dim``, as long as dimension sizes do not change. diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 6f4f9f768d9..b477e8cccb2 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -7,9 +7,8 @@ import pytest import xarray as xr -from xarray.core import formatting from xarray.core.npcompat import IS_NEP18_ACTIVE -from xarray.testing import assert_allclose, assert_identical +from xarray.testing import assert_allclose, assert_equal, assert_identical from .test_variable import _PAD_XR_NP_ARGS, VariableSubclassobjects @@ -27,11 +26,6 @@ pytest.mark.skipif( not IS_NEP18_ACTIVE, reason="NUMPY_EXPERIMENTAL_ARRAY_FUNCTION is not enabled" ), - # TODO: remove this once pint has a released version with __array_function__ - pytest.mark.skipif( - not hasattr(unit_registry.Quantity, "__array_function__"), - reason="pint does not implement __array_function__ yet", - ), # pytest.mark.filterwarnings("ignore:::pint[.*]"), ] @@ -51,10 +45,23 @@ def dimensionality(obj): def compatible_mappings(first, second): return { key: is_compatible(unit1, unit2) - for key, (unit1, unit2) in merge_mappings(first, second) + for key, (unit1, unit2) in zip_mappings(first, second) } +def merge_mappings(base, *mappings): + result = base.copy() + for m in mappings: + result.update(m) + + return result + + +def zip_mappings(*mappings): + for key in set(mappings[0]).intersection(*mappings[1:]): + yield key, tuple(m[key] for m in mappings) + + def array_extract_units(obj): if isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)): obj = obj.data @@ -257,50 +264,11 @@ def assert_units_equal(a, b): assert extract_units(a) == extract_units(b) -def assert_equal_with_units(a, b): - # works like xr.testing.assert_equal, but also explicitly checks units - # so, it is more like assert_identical - __tracebackhide__ = True - - if isinstance(a, xr.Dataset) or isinstance(b, xr.Dataset): - a_units = extract_units(a) - b_units = extract_units(b) - - a_without_units = strip_units(a) - b_without_units = strip_units(b) - - assert a_without_units.equals(b_without_units), formatting.diff_dataset_repr( - a, b, "equals" - ) - assert a_units == b_units - else: - a = a if not isinstance(a, (xr.DataArray, xr.Variable)) else a.data - b = b if not isinstance(b, (xr.DataArray, xr.Variable)) else b.data - - assert type(a) == type(b) or ( - isinstance(a, Quantity) and isinstance(b, Quantity) - ) - - # workaround until pint implements allclose in __array_function__ - if isinstance(a, Quantity) or isinstance(b, Quantity): - assert ( - hasattr(a, "magnitude") and hasattr(b, "magnitude") - ) and np.allclose(a.magnitude, b.magnitude, equal_nan=True) - assert (hasattr(a, "units") and hasattr(b, "units")) and a.units == b.units - else: - assert np.allclose(a, b, equal_nan=True) - - @pytest.fixture(params=[float, int]) def dtype(request): return request.param -def merge_mappings(*mappings): - for key in set(mappings[0]).intersection(*mappings[1:]): - yield key, tuple(m[key] for m in mappings) - - def merge_args(default_args, new_args): from itertools import zip_longest @@ -427,7 +395,7 @@ def test_apply_ufunc_dataset(dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -518,7 +486,7 @@ def test_align_dataarray(fill_value, variant, unit, error, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -939,7 +907,7 @@ def test_concat_dataset(variant, unit, error, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -1050,7 +1018,7 @@ def test_merge_dataarray(variant, unit, error, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -1430,7 +1398,7 @@ def example_1d_objects(self): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) def test_real_and_imag(self): super().test_real_and_imag() @@ -1474,7 +1442,7 @@ def test_aggregation(self, func, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) def test_aggregate_complex(self): variable = xr.Variable("x", [1, 2j, np.nan] * unit_registry.m) @@ -1486,7 +1454,7 @@ def test_aggregate_complex(self): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "func", @@ -1788,7 +1756,7 @@ def test_isel(self, indices, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "unit,error", @@ -1928,7 +1896,7 @@ def test_squeeze(self, dtype): pytest.param( method("quantile", q=[0.25, 0.75]), marks=pytest.mark.xfail( - LooseVersion(pint.__version__) < "0.12", + LooseVersion(pint.__version__) <= "0.12", reason="quantile / nanquantile not implemented yet", ), ), @@ -2268,7 +2236,7 @@ def test_repr(self, func, variant, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose", + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose", ) @pytest.mark.parametrize( "func", @@ -3331,7 +3299,7 @@ def test_head_tail_thin(self, func, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( @@ -3408,7 +3376,7 @@ def test_interp_reindex_indexing(self, func, unit, error, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( @@ -3577,7 +3545,7 @@ def test_stacking_reordering(self, func, dtype): pytest.param( method("quantile", q=[0.25, 0.75]), marks=pytest.mark.xfail( - LooseVersion(pint.__version__) < "0.12", + LooseVersion(pint.__version__) <= "0.12", reason="quantile / nanquantile not implemented yet", ), ), @@ -3614,7 +3582,7 @@ def test_computation(self, func, dtype): # TODO: remove once pint==0.12 has been released @pytest.mark.xfail( - LooseVersion(pint.__version__) <= "0.11", reason="pint bug in isclose" + LooseVersion(pint.__version__) <= "0.12", reason="pint bug in isclose" ) @pytest.mark.parametrize( "func", @@ -3630,7 +3598,9 @@ def test_computation(self, func, dtype): ), pytest.param( method("rolling_exp", y=3), - marks=pytest.mark.xfail(reason="units not supported by numbagg"), + marks=pytest.mark.xfail( + reason="numbagg functions are not supported by pint" + ), ), ), ids=repr, @@ -3676,7 +3646,7 @@ def test_resample(self, dtype): pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), marks=pytest.mark.xfail( - LooseVersion(pint.__version__) < "0.12", + LooseVersion(pint.__version__) <= "0.12", reason="quantile / nanquantile not implemented yet", ), ), @@ -3711,15 +3681,16 @@ def test_grouped_operations(self, func, dtype): xr.testing.assert_identical(expected, actual) +@pytest.mark.filterwarnings("error::pint.UnitStrippedWarning") class TestDataset: @pytest.mark.parametrize( "unit,error", ( - pytest.param(1, DimensionalityError, id="no_unit"), + pytest.param(1, xr.MergeError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + unit_registry.dimensionless, xr.MergeError, id="dimensionless" ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), + pytest.param(unit_registry.s, xr.MergeError, id="incompatible_unit"), pytest.param(unit_registry.mm, None, id="compatible_unit"), pytest.param(unit_registry.m, None, id="same_unit"), ), @@ -3728,11 +3699,10 @@ class TestDataset: "shared", ( "nothing", - pytest.param("dims", marks=pytest.mark.xfail(reason="indexes strip units")), pytest.param( - "coords", - marks=pytest.mark.xfail(reason="reindex does not work with pint yet"), + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") ), + "coords", ), ) def test_init(self, shared, unit, error, dtype): @@ -3740,60 +3710,53 @@ def test_init(self, shared, unit, error, dtype): scaled_unit = unit_registry.mm a = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa - b = np.linspace(-1, 0, 12).astype(dtype) * unit_registry.Pa - - raw_x = np.arange(a.shape[0]) - x = raw_x * original_unit - x2 = x.to(scaled_unit) - - raw_y = np.arange(b.shape[0]) - y = raw_y * unit - y_units = unit if isinstance(y, unit_registry.Quantity) else None - if isinstance(y, unit_registry.Quantity): - if y.check(scaled_unit): - y2 = y.to(scaled_unit) - else: - y2 = y * 1000 - y2_units = y2.units - else: - y2 = y * 1000 - y2_units = None + b = np.linspace(-1, 0, 10).astype(dtype) * unit_registry.degK + + values_a = np.arange(a.shape[0]) + dim_a = values_a * original_unit + coord_a = dim_a.to(scaled_unit) + + values_b = np.arange(b.shape[0]) + dim_b = values_b * unit + coord_b = ( + dim_b.to(scaled_unit) + if unit_registry.is_compatible_with(dim_b, scaled_unit) + and unit != scaled_unit + else dim_b * 1000 + ) variants = { - "nothing": ({"x": x, "x2": ("x", x2)}, {"y": y, "y2": ("y", y2)}), - "dims": ( - {"x": x, "x2": ("x", strip_units(x2))}, - {"x": y, "y2": ("x", strip_units(y2))}, + "nothing": ({}, {}), + "dims": ({"x": dim_a}, {"x": dim_b}), + "coords": ( + {"x": values_a, "y": ("x", coord_a)}, + {"x": values_b, "y": ("x", coord_b)}, ), - "coords": ({"x": raw_x, "y": ("x", x2)}, {"x": raw_y, "y": ("x", y2)}), } coords_a, coords_b = variants.get(shared) dims_a, dims_b = ("x", "y") if shared == "nothing" else ("x", "x") - arr1 = xr.DataArray(data=a, coords=coords_a, dims=dims_a) - arr2 = xr.DataArray(data=b, coords=coords_b, dims=dims_b) + a = xr.DataArray(data=a, coords=coords_a, dims=dims_a) + b = xr.DataArray(data=b, coords=coords_b, dims=dims_b) + if error is not None and shared != "nothing": with pytest.raises(error): - xr.Dataset(data_vars={"a": arr1, "b": arr2}) + xr.Dataset(data_vars={"a": a, "b": b}) return - actual = xr.Dataset(data_vars={"a": arr1, "b": arr2}) + actual = xr.Dataset(data_vars={"a": a, "b": b}) - expected_units = { - "a": a.units, - "b": b.units, - "x": x.units, - "x2": x2.units, - "y": y_units, - "y2": y2_units, - } + units = merge_mappings( + extract_units(a.rename("a")), extract_units(b.rename("b")) + ) expected = attach_units( - xr.Dataset(data_vars={"a": strip_units(arr1), "b": strip_units(arr2)}), - expected_units, + xr.Dataset(data_vars={"a": strip_units(a), "b": strip_units(b)}), units ) - assert_equal_with_units(actual, expected) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", (pytest.param(str, id="str"), pytest.param(repr, id="repr")) @@ -3801,48 +3764,45 @@ def test_init(self, shared, unit, error, dtype): @pytest.mark.parametrize( "variant", ( + "data", pytest.param( - "with_dims", + "dims", marks=pytest.mark.xfail(reason="units in indexes are not supported"), ), - pytest.param("with_coords"), - pytest.param("without_coords"), + "coords", ), ) - @pytest.mark.filterwarnings("error:::pint[.*]") def test_repr(self, func, variant, dtype): - array1 = np.linspace(1, 2, 10, dtype=dtype) * unit_registry.Pa - array2 = np.linspace(0, 1, 10, dtype=dtype) * unit_registry.degK + unit1, unit2 = ( + (unit_registry.Pa, unit_registry.degK) if variant == "data" else (1, 1) + ) + + array1 = np.linspace(1, 2, 10, dtype=dtype) * unit1 + array2 = np.linspace(0, 1, 10, dtype=dtype) * unit2 x = np.arange(len(array1)) * unit_registry.s y = x.to(unit_registry.ms) variants = { - "with_dims": {"x": x}, - "with_coords": {"y": ("x", y)}, - "without_coords": {}, + "dims": {"x": x}, + "coords": {"y": ("x", y)}, + "data": {}, } - data_array = xr.Dataset( + ds = xr.Dataset( data_vars={"a": ("x", array1), "b": ("x", array2)}, coords=variants.get(variant), ) # FIXME: this just checks that the repr does not raise # warnings or errors, but does not check the result - func(data_array) + func(ds) @pytest.mark.parametrize( "func", ( - pytest.param( - function("all"), - marks=pytest.mark.xfail(reason="not implemented by pint"), - ), - pytest.param( - function("any"), - marks=pytest.mark.xfail(reason="not implemented by pint"), - ), + function("all"), + function("any"), function("argmax"), function("argmin"), function("max"), @@ -3850,28 +3810,19 @@ def test_repr(self, func, variant, dtype): function("mean"), pytest.param( function("median"), - marks=pytest.mark.xfail( - reason="np.median does not work with dataset yet" - ), + marks=pytest.mark.xfail(reason="median does not work with dataset yet"), ), function("sum"), pytest.param( function("prod"), - marks=pytest.mark.xfail(reason="not implemented by pint"), + marks=pytest.mark.xfail(reason="prod does not work with dataset yet"), ), function("std"), function("var"), function("cumsum"), - pytest.param( - function("cumprod"), - marks=pytest.mark.xfail(reason="fails within xarray"), - ), - pytest.param( - method("all"), marks=pytest.mark.xfail(reason="not implemented by pint") - ), - pytest.param( - method("any"), marks=pytest.mark.xfail(reason="not implemented by pint") - ), + function("cumprod"), + method("all"), + method("any"), method("argmax"), method("argmin"), method("max"), @@ -3881,68 +3832,49 @@ def test_repr(self, func, variant, dtype): method("sum"), pytest.param( method("prod"), - marks=pytest.mark.xfail(reason="not implemented by pint"), + marks=pytest.mark.xfail(reason="prod does not work with dataset yet"), ), method("std"), method("var"), method("cumsum"), - pytest.param( - method("cumprod"), marks=pytest.mark.xfail(reason="fails within xarray") - ), + method("cumprod"), ), ids=repr, ) def test_aggregation(self, func, dtype): - unit_a = ( - unit_registry.Pa if func.name != "cumprod" else unit_registry.dimensionless - ) - unit_b = ( - unit_registry.kg / unit_registry.m ** 3 + unit_a, unit_b = ( + (unit_registry.Pa, unit_registry.degK) if func.name != "cumprod" - else unit_registry.dimensionless - ) - a = xr.DataArray(data=np.linspace(0, 1, 10).astype(dtype) * unit_a, dims="x") - b = xr.DataArray(data=np.linspace(-1, 0, 10).astype(dtype) * unit_b, dims="x") - x = xr.DataArray(data=np.arange(10).astype(dtype) * unit_registry.m, dims="x") - y = xr.DataArray( - data=np.arange(10, 20).astype(dtype) * unit_registry.s, dims="x" + else (unit_registry.dimensionless, unit_registry.dimensionless) ) - ds = xr.Dataset(data_vars={"a": a, "b": b}, coords={"x": x, "y": y}) + a = np.linspace(0, 1, 10).astype(dtype) * unit_a + b = np.linspace(-1, 0, 10).astype(dtype) * unit_b + + ds = xr.Dataset({"a": ("x", a), "b": ("x", b)}) + + units_a = array_extract_units(func(a)) + units_b = array_extract_units(func(b)) + units = {"a": units_a, "b": units_b} actual = func(ds) - expected = attach_units( - func(strip_units(ds)), - { - "a": extract_units(func(a)).get(None), - "b": extract_units(func(b)).get(None), - }, - ) + expected = attach_units(func(strip_units(ds)), units) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize("property", ("imag", "real")) def test_numpy_properties(self, property, dtype): - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray( - data=np.linspace(0, 1, 10) * unit_registry.Pa, dims="x" - ), - "b": xr.DataArray( - data=np.linspace(-1, 0, 15) * unit_registry.Pa, dims="y" - ), - }, - coords={ - "x": np.arange(10) * unit_registry.m, - "y": np.arange(15) * unit_registry.s, - }, - ) + a = np.linspace(0, 1, 10) * unit_registry.Pa + b = np.linspace(-1, 0, 15) * unit_registry.degK + ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) units = extract_units(ds) actual = getattr(ds, property) expected = attach_units(getattr(strip_units(ds), property), units) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", @@ -3956,31 +3888,19 @@ def test_numpy_properties(self, property, dtype): ids=repr, ) def test_numpy_methods(self, func, dtype): - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray( - data=np.linspace(1, -1, 10) * unit_registry.Pa, dims="x" - ), - "b": xr.DataArray( - data=np.linspace(-1, 1, 15) * unit_registry.Pa, dims="y" - ), - }, - coords={ - "x": np.arange(10) * unit_registry.m, - "y": np.arange(15) * unit_registry.s, - }, - ) - units = { - "a": array_extract_units(func(ds.a)), - "b": array_extract_units(func(ds.b)), - "x": unit_registry.m, - "y": unit_registry.s, - } + a = np.linspace(1, -1, 10) * unit_registry.Pa + b = np.linspace(-1, 1, 15) * unit_registry.degK + ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) + + units_a = array_extract_units(func(a)) + units_b = array_extract_units(func(b)) + units = {"a": units_a, "b": units_b} actual = func(ds) expected = attach_units(func(strip_units(ds)), units) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize("func", (method("clip", min=3, max=8),), ids=repr) @pytest.mark.parametrize( @@ -3997,21 +3917,13 @@ def test_numpy_methods(self, func, dtype): ) def test_numpy_methods_with_args(self, func, unit, error, dtype): data_unit = unit_registry.m - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=np.arange(10) * data_unit, dims="x"), - "b": xr.DataArray(data=np.arange(15) * data_unit, dims="y"), - }, - coords={ - "x": np.arange(10) * unit_registry.m, - "y": np.arange(15) * unit_registry.s, - }, - ) + a = np.linspace(0, 10, 15) * unit_registry.m + b = np.linspace(-2, 12, 20) * unit_registry.m + ds = xr.Dataset({"a": ("x", a), "b": ("y", b)}) units = extract_units(ds) kwargs = { - key: (value * unit if isinstance(value, (int, float)) else value) - for key, value in func.kwargs.items() + key: array_attach_units(value, unit) for key, value in func.kwargs.items() } if error is not None: @@ -4028,7 +3940,8 @@ def test_numpy_methods_with_args(self, func, unit, error, dtype): actual = func(ds, **kwargs) expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", (method("isnull"), method("notnull"), method("count")), ids=repr @@ -4058,22 +3971,13 @@ def test_missing_value_detection(self, func, dtype): * unit_registry.Pa ) - x = np.arange(array1.shape[0]) * unit_registry.m - y = np.arange(array1.shape[1]) * unit_registry.m - z = np.arange(array2.shape[0]) * unit_registry.m - - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("z", "x")), - }, - coords={"x": x, "y": y, "z": z}, - ) + ds = xr.Dataset({"a": (("x", "y"), array1), "b": (("z", "x"), array2)}) expected = func(strip_units(ds)) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="ffill and bfill lose the unit") @pytest.mark.parametrize("func", (method("ffill"), method("bfill")), ids=repr) @@ -4087,23 +3991,14 @@ def test_missing_value_filling(self, func, dtype): * unit_registry.Pa ) - x = np.arange(len(array1)) - - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("y", array2)}) + units = extract_units(ds) - expected = attach_units( - func(strip_units(ds), dim="x"), - {"a": unit_registry.degK, "b": unit_registry.Pa}, - ) + expected = attach_units(func(strip_units(ds), dim="x"), units) actual = func(ds, dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -4113,14 +4008,7 @@ def test_missing_value_filling(self, func, dtype): unit_registry.dimensionless, DimensionalityError, id="dimensionless" ), pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param( - unit_registry.cm, - None, - id="compatible_unit", - marks=pytest.mark.xfail( - reason="where converts the array, not the fill value" - ), - ), + pytest.param(unit_registry.cm, None, id="compatible_unit",), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -4141,30 +4029,26 @@ def test_fillna(self, fill_value, unit, error, dtype): np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * unit_registry.m ) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - } - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) + value = fill_value * unit + units = extract_units(ds) if error is not None: with pytest.raises(error): - ds.fillna(value=fill_value * unit) + ds.fillna(value=value) return - actual = ds.fillna(value=fill_value * unit) + actual = ds.fillna(value=value) expected = attach_units( strip_units(ds).fillna( - value=strip_units( - convert_units(fill_value * unit, {None: unit_registry.m}) - ) + value=strip_units(convert_units(value, {None: unit_registry.m})) ), - {"a": unit_registry.m, "b": unit_registry.m}, + units, ) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) def test_dropna(self, dtype): array1 = ( @@ -4175,22 +4059,14 @@ def test_dropna(self, dtype): np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * unit_registry.Pa ) - x = np.arange(len(array1)) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) + units = extract_units(ds) - expected = attach_units( - strip_units(ds).dropna(dim="x"), - {"a": unit_registry.degK, "b": unit_registry.Pa}, - ) + expected = attach_units(strip_units(ds).dropna(dim="x"), units) actual = ds.dropna(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit", @@ -4211,34 +4087,28 @@ def test_isin(self, unit, dtype): np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * unit_registry.m ) - x = np.arange(len(array1)) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) raw_values = np.array([1.4, np.nan, 2.3]).astype(dtype) values = raw_values * unit - if ( - isinstance(values, unit_registry.Quantity) - and values.check(unit_registry.m) - and unit != unit_registry.m - ): - raw_values = values.to(unit_registry.m).magnitude + converted_values = ( + convert_units(values, {None: unit_registry.m}) + if is_compatible(unit, unit_registry.m) + else values + ) - expected = strip_units(ds).isin(raw_values) - if not isinstance(values, unit_registry.Quantity) or not values.check( - unit_registry.m - ): + expected = strip_units(ds).isin(strip_units(converted_values)) + # TODO: use `unit_registry.is_compatible_with(unit, unit_registry.m)` instead. + # Needs `pint>=0.12.1`, though, so we probably should wait until that is released. + if not is_compatible(unit, unit_registry.m): expected.a[:] = False expected.b[:] = False + actual = ds.isin(values) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "variant", ("masking", "replacing_scalar", "replacing_array", "dropping") @@ -4260,13 +4130,8 @@ def test_where(self, variant, unit, error, dtype): array1 = np.linspace(0, 1, 10).astype(dtype) * original_unit array2 = np.linspace(-1, 0, 10).astype(dtype) * original_unit - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": np.arange(len(array1))}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) + units = extract_units(ds) condition = ds < 0.5 * original_unit other = np.linspace(-2, -1, 10).astype(dtype) * unit @@ -4288,15 +4153,13 @@ def test_where(self, variant, unit, error, dtype): for key, value in kwargs.items() } - expected = attach_units( - strip_units(ds).where(**kwargs_without_units), - {"a": original_unit, "b": original_unit}, - ) + expected = attach_units(strip_units(ds).where(**kwargs_without_units), units,) actual = ds.where(**kwargs) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="interpolate strips units") + @pytest.mark.xfail(reason="interpolate_na uses numpy.vectorize") def test_interpolate_na(self, dtype): array1 = ( np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) @@ -4306,24 +4169,15 @@ def test_interpolate_na(self, dtype): np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * unit_registry.Pa ) - x = np.arange(len(array1)) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}) + units = extract_units(ds) - expected = attach_units( - strip_units(ds).interpolate_na(dim="x"), - {"a": unit_registry.degK, "b": unit_registry.Pa}, - ) + expected = attach_units(strip_units(ds).interpolate_na(dim="x"), units,) actual = ds.interpolate_na(dim="x") - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="wrong argument order for `where`") @pytest.mark.parametrize( "unit,error", ( @@ -4336,31 +4190,40 @@ def test_interpolate_na(self, dtype): pytest.param(unit_registry.m, None, id="same_unit"), ), ) - def test_combine_first(self, unit, error, dtype): + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + ), + ) + def test_combine_first(self, variant, unit, error, dtype): + variants = { + "data": (unit_registry.m, unit, 1, 1), + "dims": (1, 1, unit_registry.m, unit), + } + data_unit, other_data_unit, dims_unit, other_dims_unit = variants.get(variant) + array1 = ( - np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) - * unit_registry.m + np.array([1.4, np.nan, 2.3, np.nan, np.nan, 9.1]).astype(dtype) * data_unit ) array2 = ( - np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) - * unit_registry.m + np.array([4.3, 9.8, 7.5, np.nan, 8.2, np.nan]).astype(dtype) * data_unit ) - x = np.arange(len(array1)) + x = np.arange(len(array1)) * dims_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, + data_vars={"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}, ) - other_array1 = np.ones_like(array1) * unit - other_array2 = -1 * np.ones_like(array2) * unit + units = extract_units(ds) + + other_array1 = np.ones_like(array1) * other_data_unit + other_array2 = np.full_like(array2, fill_value=-1) * other_data_unit + other_x = (np.arange(array1.shape[0]) + 5) * other_dims_unit other = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=other_array1, dims="x"), - "b": xr.DataArray(data=other_array2, dims="x"), - }, - coords={"x": np.arange(array1.shape[0])}, + data_vars={"a": ("x", other_array1), "b": ("x", other_array2)}, + coords={"x": other_x}, ) if error is not None: @@ -4370,16 +4233,13 @@ def test_combine_first(self, unit, error, dtype): return expected = attach_units( - strip_units(ds).combine_first( - strip_units( - convert_units(other, {"a": unit_registry.m, "b": unit_registry.m}) - ) - ), - {"a": unit_registry.m, "b": unit_registry.m}, + strip_units(ds).combine_first(strip_units(convert_units(other, units))), + units, ) actual = ds.combine_first(other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit", @@ -4392,7 +4252,7 @@ def test_combine_first(self, unit, error, dtype): ), ) @pytest.mark.parametrize( - "variation", + "variant", ( "data", pytest.param( @@ -4401,50 +4261,67 @@ def test_combine_first(self, unit, error, dtype): "coords", ), ) - @pytest.mark.parametrize("func", (method("equals"), method("identical")), ids=repr) - def test_comparisons(self, func, variation, unit, dtype): - def is_compatible(a, b): - a = a if a is not None else 1 - b = b if b is not None else 1 - quantity = np.arange(5) * a - - return a == b or quantity.check(b) - + @pytest.mark.parametrize( + "func", + ( + method("equals"), + pytest.param( + method("identical"), + marks=pytest.mark.skip("behaviour of identical is unclear"), + ), + ), + ids=repr, + ) + def test_comparisons(self, func, variant, unit, dtype): array1 = np.linspace(0, 5, 10).astype(dtype) array2 = np.linspace(-5, 0, 10).astype(dtype) coord = np.arange(len(array1)).astype(dtype) - original_unit = unit_registry.m - quantity1 = array1 * original_unit - quantity2 = array2 * original_unit - x = coord * original_unit - y = coord * original_unit + variants = { + "data": (unit_registry.m, 1, 1), + "dims": (1, unit_registry.m, 1), + "coords": (1, 1, unit_registry.m), + } + data_unit, dim_unit, coord_unit = variants.get(variant) - units = {"data": (unit, 1, 1), "dims": (1, unit, 1), "coords": (1, 1, unit)} - data_unit, dim_unit, coord_unit = units.get(variation) + a = array1 * data_unit + b = array2 * data_unit + x = coord * dim_unit + y = coord * coord_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=quantity1, dims="x"), - "b": xr.DataArray(data=quantity2, dims="x"), - }, - coords={"x": x, "y": ("x", y)}, + data_vars={"a": ("x", a), "b": ("x", b)}, coords={"x": x, "y": ("x", y)}, ) + units = extract_units(ds) + + other_variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), + } + other_data_unit, other_dim_unit, other_coord_unit = other_variants.get(variant) other_units = { - "a": data_unit if quantity1.check(data_unit) else None, - "b": data_unit if quantity2.check(data_unit) else None, - "x": dim_unit if x.check(dim_unit) else None, - "y": coord_unit if y.check(coord_unit) else None, + "a": other_data_unit, + "b": other_data_unit, + "x": other_dim_unit, + "y": other_coord_unit, } - other = attach_units(strip_units(convert_units(ds, other_units)), other_units) - units = extract_units(ds) + to_convert = { + key: unit if is_compatible(unit, reference) else None + for key, (unit, reference) in zip_mappings(units, other_units) + } + # convert units where possible, then attach all units to the converted dataset + other = attach_units(strip_units(convert_units(ds, to_convert)), other_units) other_units = extract_units(other) + # make sure all units are compatible and only then try to + # convert and compare values equal_ds = all( - is_compatible(units[name], other_units[name]) for name in units.keys() + is_compatible(unit, other_unit) + for _, (unit, other_unit) in zip_mappings(units, other_units) ) and (strip_units(ds).equals(strip_units(convert_units(other, units)))) equal_units = units == other_units expected = equal_ds and (func.name != "identical" or equal_units) @@ -4453,6 +4330,9 @@ def is_compatible(a, b): assert expected == actual + # TODO: eventually use another decorator / wrapper function that + # applies a filter to the parametrize combinations: + # we only need a single test for data @pytest.mark.parametrize( "unit", ( @@ -4463,14 +4343,29 @@ def is_compatible(a, b): pytest.param(unit_registry.m, id="identical_unit"), ), ) - def test_broadcast_like(self, unit, dtype): - array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * unit_registry.Pa - array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * unit_registry.Pa + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + ), + ) + def test_broadcast_like(self, variant, unit, dtype): + variants = { + "data": ((unit_registry.m, unit), (1, 1)), + "dims": ((1, 1), (unit_registry.m, unit)), + } + (data_unit1, data_unit2), (dim_unit1, dim_unit2) = variants.get(variant) - x1 = np.arange(2) * unit_registry.m - x2 = np.arange(2) * unit - y1 = np.array([0]) * unit_registry.m - y2 = np.arange(3) * unit + array1 = np.linspace(1, 2, 2 * 1).reshape(2, 1).astype(dtype) * data_unit1 + array2 = np.linspace(0, 1, 2 * 3).reshape(2, 3).astype(dtype) * data_unit2 + + x1 = np.arange(2) * dim_unit1 + x2 = np.arange(2) * dim_unit2 + y1 = np.array([0]) * dim_unit1 + y2 = np.arange(3) * dim_unit2 ds1 = xr.Dataset( data_vars={"a": (("x", "y"), array1)}, coords={"x": x1, "y": y1} @@ -4484,7 +4379,8 @@ def test_broadcast_like(self, unit, dtype): ) actual = ds1.broadcast_like(ds2) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit", @@ -4497,32 +4393,25 @@ def test_broadcast_like(self, unit, dtype): ), ) def test_broadcast_equals(self, unit, dtype): + # TODO: does this use indexes? left_array1 = np.ones(shape=(2, 3), dtype=dtype) * unit_registry.m left_array2 = np.zeros(shape=(3, 6), dtype=dtype) * unit_registry.m right_array1 = np.ones(shape=(2,)) * unit - right_array2 = np.ones(shape=(3,)) * unit + right_array2 = np.zeros(shape=(3,)) * unit left = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=left_array1, dims=("x", "y")), - "b": xr.DataArray(data=left_array2, dims=("y", "z")), - } - ) - right = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=right_array1, dims="x"), - "b": xr.DataArray(data=right_array2, dims="y"), - } + {"a": (("x", "y"), left_array1), "b": (("y", "z"), left_array2)}, ) + right = xr.Dataset({"a": ("x", right_array1), "b": ("y", right_array2)}) - units = { - **extract_units(left), - **({} if left_array1.check(unit) else {"a": None, "b": None}), - } - expected = strip_units(left).broadcast_equals( - strip_units(convert_units(right, units)) - ) & left_array1.check(unit) + units = merge_mappings( + extract_units(left), + {} if is_compatible(left_array1, unit) else {"a": None, "b": None}, + ) + expected = is_compatible(left_array1, unit) and strip_units( + left + ).broadcast_equals(strip_units(convert_units(right, units))) actual = left.broadcast_equals(right) assert expected == actual @@ -4532,68 +4421,74 @@ def test_broadcast_equals(self, unit, dtype): (method("unstack"), method("reset_index", "v"), method("reorder_levels")), ids=repr, ) - def test_stacking_stacked(self, func, dtype): - array1 = ( - np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * unit_registry.m - ) + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units"), + ), + ), + ) + def test_stacking_stacked(self, variant, func, dtype): + variants = { + "data": (unit_registry.m, 1), + "dims": (1, unit_registry.m), + } + data_unit, dim_unit = variants.get(variant) + + array1 = np.linspace(0, 10, 5 * 10).reshape(5, 10).astype(dtype) * data_unit array2 = ( np.linspace(-10, 0, 5 * 10 * 15).reshape(5, 10, 15).astype(dtype) - * unit_registry.m + * data_unit ) - x = np.arange(array1.shape[0]) - y = np.arange(array1.shape[1]) - z = np.arange(array2.shape[2]) + x = np.arange(array1.shape[0]) * dim_unit + y = np.arange(array1.shape[1]) * dim_unit + z = np.arange(array2.shape[2]) * dim_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z")), - }, + data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)}, coords={"x": x, "y": y, "z": z}, ) + units = extract_units(ds) stacked = ds.stack(v=("x", "y")) - expected = attach_units( - func(strip_units(stacked)), {"a": unit_registry.m, "b": unit_registry.m} - ) + expected = attach_units(func(strip_units(stacked)), units) actual = func(stacked) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="does not work with quantities yet") + @pytest.mark.xfail( + reason="stacked dimension's labels have to be hashable, but is a numpy.array" + ) def test_to_stacked_array(self, dtype): - labels = np.arange(5).astype(dtype) * unit_registry.s - arrays = {name: np.linspace(0, 1, 10) * unit_registry.m for name in labels} + labels = range(5) * unit_registry.s + arrays = { + name: np.linspace(0, 1, 10).astype(dtype) * unit_registry.m + for name in labels + } - ds = xr.Dataset( - data_vars={ - name: xr.DataArray(data=array, dims="x") - for name, array in arrays.items() - } - ) + ds = xr.Dataset({name: ("x", array) for name, array in arrays.items()}) + units = {None: unit_registry.m, "y": unit_registry.s} func = method("to_stacked_array", "z", variable_dim="y", sample_dims=["x"]) actual = func(ds).rename(None) - expected = attach_units( - func(strip_units(ds)).rename(None), - {None: unit_registry.m, "y": unit_registry.s}, - ) + expected = attach_units(func(strip_units(ds)).rename(None), units,) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("transpose", "y", "x", "z1", "z2"), - method("stack", a=("x", "y")), + method("stack", u=("x", "y")), method("set_index", x="x2"), - pytest.param( - method("shift", x=2), - marks=pytest.mark.xfail(reason="tries to concatenate nan arrays"), - ), + method("shift", x=2), method("roll", x=2, roll_coords=False), method("sortby", "x2"), ), @@ -4618,20 +4513,19 @@ def test_stacking_reordering(self, func, dtype): ds = xr.Dataset( data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y", "z1")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z2")), + "a": (("x", "y", "z1"), array1), + "b": (("x", "y", "z2"), array2), }, coords={"x": x, "y": y, "z1": z1, "z2": z2, "x2": ("x", x2)}, ) + units = extract_units(ds) - expected = attach_units( - func(strip_units(ds)), {"a": unit_registry.Pa, "b": unit_registry.degK} - ) + expected = attach_units(func(strip_units(ds)), units) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="indexes strip units") @pytest.mark.parametrize( "indices", ( @@ -4643,22 +4537,14 @@ def test_isel(self, indices, dtype): array1 = np.arange(10).astype(dtype) * unit_registry.s array2 = np.linspace(0, 1, 10).astype(dtype) * unit_registry.Pa - x = np.arange(len(array1)) * unit_registry.m - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims="x"), - "b": xr.DataArray(data=array2, dims="x"), - }, - coords={"x": x}, - ) + ds = xr.Dataset(data_vars={"a": ("x", array1), "b": ("x", array2)}) + units = extract_units(ds) - expected = attach_units( - strip_units(ds).isel(x=indices), - {"a": unit_registry.s, "b": unit_registry.Pa, "x": unit_registry.m}, - ) + expected = attach_units(strip_units(ds).isel(x=indices), units) actual = ds.isel(x=indices) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -4675,7 +4561,7 @@ def test_isel(self, indices, dtype): pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), - pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -4694,20 +4580,24 @@ def test_sel(self, raw_values, unit, error, dtype): values = raw_values * unit - if error is not None and not ( - isinstance(raw_values, (int, float)) and x.check(unit) - ): + # TODO: if we choose dm as compatible unit, single value keys + # can be found. Should we check that? + if error is not None: with pytest.raises(error): ds.sel(x=values) return expected = attach_units( - strip_units(ds).sel(x=strip_units(convert_units(values, {None: x.units}))), - {"a": array1.units, "b": array2.units, "x": x.units}, + strip_units(ds).sel( + x=strip_units(convert_units(values, {None: unit_registry.m})) + ), + extract_units(ds), ) actual = ds.sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -4724,7 +4614,7 @@ def test_sel(self, raw_values, unit, error, dtype): pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), - pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -4743,9 +4633,9 @@ def test_drop_sel(self, raw_values, unit, error, dtype): values = raw_values * unit - if error is not None and not ( - isinstance(raw_values, (int, float)) and x.check(unit) - ): + # TODO: if we choose dm as compatible unit, single value keys + # can be found. Should we check that? + if error is not None: with pytest.raises(error): ds.drop_sel(x=values) @@ -4753,12 +4643,14 @@ def test_drop_sel(self, raw_values, unit, error, dtype): expected = attach_units( strip_units(ds).drop_sel( - x=strip_units(convert_units(values, {None: x.units})) + x=strip_units(convert_units(values, {None: unit_registry.m})) ), extract_units(ds), ) actual = ds.drop_sel(x=values) - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -4775,7 +4667,7 @@ def test_drop_sel(self, raw_values, unit, error, dtype): pytest.param(1, KeyError, id="no_units"), pytest.param(unit_registry.dimensionless, KeyError, id="dimensionless"), pytest.param(unit_registry.degree, KeyError, id="incompatible_unit"), - pytest.param(unit_registry.dm, KeyError, id="compatible_unit"), + pytest.param(unit_registry.mm, KeyError, id="compatible_unit"), pytest.param(unit_registry.m, None, id="identical_unit"), ), ) @@ -4794,9 +4686,9 @@ def test_loc(self, raw_values, unit, error, dtype): values = raw_values * unit - if error is not None and not ( - isinstance(raw_values, (int, float)) and x.check(unit) - ): + # TODO: if we choose dm as compatible unit, single value keys + # can be found. Should we check that? + if error is not None: with pytest.raises(error): ds.loc[{"x": values}] @@ -4804,12 +4696,14 @@ def test_loc(self, raw_values, unit, error, dtype): expected = attach_units( strip_units(ds).loc[ - {"x": strip_units(convert_units(values, {None: x.units}))} + {"x": strip_units(convert_units(values, {None: unit_registry.m}))} ], - {"a": array1.units, "b": array2.units, "x": x.units}, + extract_units(ds), ) actual = ds.loc[{"x": values}] - assert_equal_with_units(expected, actual) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", @@ -4820,14 +4714,34 @@ def test_loc(self, raw_values, unit, error, dtype): ), ids=repr, ) - def test_head_tail_thin(self, func, dtype): - array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_head_tail_thin(self, func, variant, dtype): + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit_a, unit_b), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_a + array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_b coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.m, + "x": np.arange(10) * dim_unit, + "y": np.arange(5) * dim_unit, + "z": np.arange(8) * dim_unit, + "u": ("x", np.linspace(0, 1, 10) * coord_unit), + "v": ("y", np.linspace(1, 2, 5) * coord_unit), + "w": ("z", np.linspace(-1, 0, 8) * coord_unit), } ds = xr.Dataset( @@ -4841,8 +4755,10 @@ def test_head_tail_thin(self, func, dtype): expected = attach_units(func(strip_units(ds)), extract_units(ds)) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) + @pytest.mark.parametrize("dim", ("x", "y", "z", "t", "all")) @pytest.mark.parametrize( "shape", ( @@ -4853,13 +4769,9 @@ def test_head_tail_thin(self, func, dtype): pytest.param((1, 10, 1, 20), id="first and last dimension squeezable"), ), ) - def test_squeeze(self, shape, dtype): + def test_squeeze(self, shape, dim, dtype): names = "xyzt" - coords = { - name: np.arange(length).astype(dtype) - * (unit_registry.m if name != "t" else unit_registry.s) - for name, length in zip(names, shape) - } + dim_lengths = dict(zip(names, shape)) array1 = ( np.linspace(0, 1, 10 * 20).astype(dtype).reshape(shape) * unit_registry.degK ) @@ -4869,74 +4781,59 @@ def test_squeeze(self, shape, dtype): ds = xr.Dataset( data_vars={ - "a": xr.DataArray(data=array1, dims=tuple(names[: len(shape)])), - "b": xr.DataArray(data=array2, dims=tuple(names[: len(shape)])), + "a": (tuple(names[: len(shape)]), array1), + "b": (tuple(names[: len(shape)]), array2), }, - coords=coords, ) units = extract_units(ds) - expected = attach_units(strip_units(ds).squeeze(), units) + kwargs = {"dim": dim} if dim != "all" and dim_lengths.get(dim, 0) == 1 else {} - actual = ds.squeeze() - assert_equal_with_units(actual, expected) + expected = attach_units(strip_units(ds).squeeze(**kwargs), units) - # try squeezing the dimensions separately - names = tuple(dim for dim, coord in coords.items() if len(coord) == 1) - for name in names: - expected = attach_units(strip_units(ds).squeeze(dim=name), units) - actual = ds.squeeze(dim=name) - assert_equal_with_units(actual, expected) + actual = ds.squeeze(**kwargs) - @pytest.mark.xfail(reason="ignores units") + assert_units_equal(expected, actual) + assert_equal(expected, actual) + + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex"), ), + ids=repr, ) - def test_interp(self, unit, error): - array1 = np.linspace(1, 2, 10 * 5).reshape(10, 5) * unit_registry.degK - array2 = np.linspace(1, 2, 10 * 8).reshape(10, 8) * unit_registry.Pa - - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.s, + def test_interp_reindex(self, func, variant, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "z")), - }, - coords=coords, - ) - - new_coords = (np.arange(10) + 0.5) * unit + array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit + array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit - if error is not None: - with pytest.raises(error): - ds.interp(x=new_coords) + y = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(8) + 0.5 - units = extract_units(ds) - expected = attach_units( - strip_units(ds).interp(x=strip_units(convert_units(new_coords, units))), - units, + ds = xr.Dataset( + {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)} ) - actual = ds.interp(x=new_coords) + units = extract_units(ds) - assert_equal_with_units(actual, expected) + expected = attach_units(func(strip_units(ds), x=new_x), units) + actual = func(ds, x=new_x) - @pytest.mark.xfail(reason="ignores units") + assert_units_equal(expected, actual) + assert_equal(expected, actual) + + @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( "unit,error", ( @@ -4949,106 +4846,67 @@ def test_interp(self, unit, error): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_interp_like(self, unit, error, dtype): - array1 = ( - np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) - - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.m, - } + @pytest.mark.parametrize("func", (method("interp"), method("reindex")), ids=repr) + def test_interp_reindex_indexing(self, func, unit, error, dtype): + array1 = np.linspace(-1, 0, 10).astype(dtype) + array2 = np.linspace(0, 1, 10).astype(dtype) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "z")), - }, - coords=coords, - ) + x = np.arange(10) * unit_registry.m + new_x = (np.arange(8) + 0.5) * unit - other = xr.Dataset( - data_vars={ - "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")), - "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")), - }, - coords={ - "x": (np.arange(20) + 0.3) * unit, - "y": (np.arange(10) - 0.2) * unit, - "z": (np.arange(15) + 0.4) * unit, - }, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) + units = extract_units(ds) if error is not None: with pytest.raises(error): - ds.interp_like(other) + func(ds, x=new_x) return - units = extract_units(ds) - expected = attach_units( - strip_units(ds).interp_like(strip_units(convert_units(other, units))), units - ) - actual = ds.interp_like(other) + expected = attach_units(func(strip_units(ds), x=new_x), units) + actual = func(ds, x=new_x) - assert_equal_with_units(actual, expected) + assert_units_equal(expected, actual) + assert_equal(expected, actual) - @pytest.mark.xfail(reason="indexes don't support units") + @pytest.mark.parametrize("variant", ("data", "coords")) @pytest.mark.parametrize( - "unit,error", + "func", ( - pytest.param(1, DimensionalityError, id="no_unit"), pytest.param( - unit_registry.dimensionless, DimensionalityError, id="dimensionless" + method("interp_like"), marks=pytest.mark.xfail(reason="uses scipy") ), - pytest.param(unit_registry.s, DimensionalityError, id="incompatible_unit"), - pytest.param(unit_registry.cm, None, id="compatible_unit"), - pytest.param(unit_registry.m, None, id="identical_unit"), + method("reindex_like"), ), + ids=repr, ) - def test_reindex(self, unit, error, dtype): - array1 = ( - np.linspace(1, 2, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(1, 2, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) - - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.s, + def test_interp_reindex_like(self, func, variant, dtype): + variants = { + "data": (unit_registry.m, 1), + "coords": (1, unit_registry.m), } + data_unit, coord_unit = variants.get(variant) - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "z")), - }, - coords=coords, - ) - - new_coords = (np.arange(10) + 0.5) * unit + array1 = np.linspace(-1, 0, 10).astype(dtype) * data_unit + array2 = np.linspace(0, 1, 10).astype(dtype) * data_unit - if error is not None: - with pytest.raises(error): - ds.reindex(x=new_coords) + y = np.arange(10) * coord_unit - return + x = np.arange(10) + new_x = np.arange(8) + 0.5 - expected = attach_units( - strip_units(ds).reindex( - x=strip_units(convert_units(new_coords, {None: coords["x"].units})) - ), - extract_units(ds), + ds = xr.Dataset( + {"a": ("x", array1), "b": ("x", array2)}, coords={"x": x, "y": ("x", y)} ) - actual = ds.reindex(x=new_coords) + units = extract_units(ds) + + other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x}) - assert_equal_with_units(actual, expected) + expected = attach_units(func(strip_units(ds), other), units) + actual = func(ds, other) + + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.xfail(reason="indexes don't support units") @pytest.mark.parametrize( @@ -5063,54 +4921,32 @@ def test_reindex(self, unit, error, dtype): pytest.param(unit_registry.m, None, id="identical_unit"), ), ) - def test_reindex_like(self, unit, error, dtype): - array1 = ( - np.linspace(0, 10, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) + @pytest.mark.parametrize( + "func", (method("interp_like"), method("reindex_like")), ids=repr + ) + def test_interp_reindex_like_indexing(self, func, unit, error, dtype): + array1 = np.linspace(-1, 0, 10).astype(dtype) + array2 = np.linspace(0, 1, 10).astype(dtype) - coords = { - "x": np.arange(10) * unit_registry.m, - "y": np.arange(5) * unit_registry.m, - "z": np.arange(8) * unit_registry.m, - } + x = np.arange(10) * unit_registry.m + new_x = (np.arange(8) + 0.5) * unit - ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "z")), - }, - coords=coords, - ) + ds = xr.Dataset({"a": ("x", array1), "b": ("x", array2)}, coords={"x": x}) + units = extract_units(ds) - other = xr.Dataset( - data_vars={ - "c": xr.DataArray(data=np.empty((20, 10)), dims=("x", "y")), - "d": xr.DataArray(data=np.empty((20, 15)), dims=("x", "z")), - }, - coords={ - "x": (np.arange(20) + 0.3) * unit, - "y": (np.arange(10) - 0.2) * unit, - "z": (np.arange(15) + 0.4) * unit, - }, - ) + other = xr.Dataset({"a": ("x", np.empty_like(new_x))}, coords={"x": new_x}) if error is not None: with pytest.raises(error): - ds.reindex_like(other) + func(ds, other) return - units = extract_units(ds) - expected = attach_units( - strip_units(ds).reindex_like(strip_units(convert_units(other, units))), - units, - ) - actual = ds.reindex_like(other) + expected = attach_units(func(strip_units(ds), other), units) + actual = func(ds, other) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", @@ -5120,30 +4956,46 @@ def test_reindex_like(self, unit, error, dtype): method("integrate", coord="x"), pytest.param( method("quantile", q=[0.25, 0.75]), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) <= "0.12", + reason="nanquantile not implemented yet", + ), ), method("reduce", func=np.sum, dim="x"), method("map", np.fabs), ), ids=repr, ) - def test_computation(self, func, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) - x = np.arange(10) * unit_registry.m - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_computation(self, func, variant, dtype): + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit1, unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2 + x = np.arange(4) * dim_unit + y = np.arange(5) * dim_unit + z = np.arange(3) * dim_unit ds = xr.Dataset( data_vars={ "a": xr.DataArray(data=array1, dims=("x", "y")), "b": xr.DataArray(data=array2, dims=("x", "z")), }, - coords={"x": x, "y": y, "z": z}, + coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)}, ) units = extract_units(ds) @@ -5151,69 +5003,105 @@ def test_computation(self, func, dtype): expected = attach_units(func(strip_units(ds)), units) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("groupby", "x"), - method("groupby_bins", "x", bins=4), + pytest.param( + method("groupby_bins", "x", bins=2), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) <= "0.12", + reason="needs assert_allclose but that does not work with pint", + ), + ), method("coarsen", x=2), pytest.param( method("rolling", x=3), marks=pytest.mark.xfail(reason="strips units") ), pytest.param( method("rolling_exp", x=3), - marks=pytest.mark.xfail(reason="uses numbagg which strips units"), + marks=pytest.mark.xfail( + reason="numbagg functions are not supported by pint" + ), ), ), ids=repr, ) - def test_computation_objects(self, func, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) - * unit_registry.Pa - ) - x = np.arange(10) * unit_registry.m - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_computation_objects(self, func, variant, dtype): + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit1, unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(-5, 5, 4 * 5).reshape(4, 5).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 4 * 3).reshape(4, 3).astype(dtype) * unit2 + x = np.arange(4) * dim_unit + y = np.arange(5) * dim_unit + z = np.arange(3) * dim_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z")), - }, - coords={"x": x, "y": y, "z": z}, + data_vars={"a": (("x", "y"), array1), "b": (("x", "z"), array2)}, + coords={"x": x, "y": y, "z": z, "y2": ("y", np.arange(5) * coord_unit)}, ) units = extract_units(ds) args = [] if func.name != "groupby" else ["y"] - reduce_func = method("mean", *args) - expected = attach_units(reduce_func(func(strip_units(ds))), units) - actual = reduce_func(func(ds)) + expected = attach_units(func(strip_units(ds)).mean(*args), units) + actual = func(ds).mean(*args) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + # TODO: remove once pint 0.12 has been released + if LooseVersion(pint.__version__) <= "0.12": + assert_equal(expected, actual) + else: + assert_allclose(expected, actual) + + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_resample(self, variant, dtype): + # TODO: move this to test_computation_objects + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit1, unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit2 - def test_resample(self, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 8).reshape(10, 8).astype(dtype) * unit_registry.Pa - ) t = pd.date_range("10-09-2010", periods=array1.shape[0], freq="1y") - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + y = np.arange(5) * dim_unit + z = np.arange(8) * dim_unit + + u = np.linspace(-1, 0, 5) * coord_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("time", "y")), - "b": xr.DataArray(data=array2, dims=("time", "z")), - }, - coords={"time": t, "y": y, "z": z}, + data_vars={"a": (("time", "y"), array1), "b": (("time", "z"), array2)}, + coords={"time": t, "y": y, "z": z, "u": ("y", u)}, ) units = extract_units(ds) @@ -5222,43 +5110,59 @@ def test_resample(self, dtype): expected = attach_units(func(strip_units(ds)).mean(), units) actual = func(ds).mean() - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("assign", c=lambda ds: 10 * ds.b), - method("assign_coords", v=("x", np.arange(10) * unit_registry.s)), + method("assign_coords", v=("x", np.arange(5) * unit_registry.s)), method("first"), method("last"), pytest.param( method("quantile", q=[0.25, 0.5, 0.75], dim="x"), - marks=pytest.mark.xfail(reason="nanquantile not implemented"), + marks=pytest.mark.xfail( + LooseVersion(pint.__version__) <= "0.12", + reason="nanquantile not implemented", + ), ), ), ids=repr, ) - def test_grouped_operations(self, func, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) * unit_registry.degK - ) - array2 = ( - np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) - * unit_registry.Pa - ) - x = np.arange(10) * unit_registry.m - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_grouped_operations(self, func, variant, dtype): + variants = { + "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), + "dims": ((1, 1), unit_registry.m, 1), + "coords": ((1, 1), 1, unit_registry.m), + } + (unit1, unit2), dim_unit, coord_unit = variants.get(variant) + + array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2 + x = np.arange(5) * dim_unit + y = np.arange(4) * dim_unit + z = np.arange(3) * dim_unit + + u = np.linspace(-1, 0, 4) * coord_unit ds = xr.Dataset( - data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z")), - }, - coords={"x": x, "y": y, "z": z}, + data_vars={"a": (("x", "y"), array1), "b": (("x", "y", "z"), array2)}, + coords={"x": x, "y": y, "z": z, "u": ("y", u)}, ) - units = extract_units(ds) - units.update({"c": unit_registry.Pa, "v": unit_registry.s}) + + assigned_units = {"c": unit2, "v": unit_registry.s} + units = merge_mappings(extract_units(ds), assigned_units) stripped_kwargs = { name: strip_units(value) for name, value in func.kwargs.items() @@ -5268,20 +5172,26 @@ def test_grouped_operations(self, func, dtype): ) actual = func(ds.groupby("y")) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "func", ( method("pipe", lambda ds: ds * 10), method("assign", d=lambda ds: ds.b * 10), - method("assign_coords", y2=("y", np.arange(5) * unit_registry.mm)), + method("assign_coords", y2=("y", np.arange(4) * unit_registry.mm)), method("assign_attrs", attr1="value"), method("rename", x2="x_mm"), method("rename_vars", c="temperature"), method("rename_dims", x="offset_x"), - method("swap_dims", {"x": "x2"}), - method("expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1), + method("swap_dims", {"x": "u"}), + pytest.param( + method( + "expand_dims", v=np.linspace(10, 20, 12) * unit_registry.s, axis=1 + ), + marks=pytest.mark.xfail(reason="indexes don't support units"), + ), method("drop_vars", "x"), method("drop_dims", "z"), method("set_coords", names="c"), @@ -5290,40 +5200,55 @@ def test_grouped_operations(self, func, dtype): ), ids=repr, ) - def test_content_manipulation(self, func, dtype): - array1 = ( - np.linspace(-5, 5, 10 * 5).reshape(10, 5).astype(dtype) - * unit_registry.m ** 3 - ) - array2 = ( - np.linspace(10, 20, 10 * 5 * 8).reshape(10, 5, 8).astype(dtype) - * unit_registry.Pa - ) - array3 = np.linspace(0, 10, 10).astype(dtype) * unit_registry.degK + @pytest.mark.parametrize( + "variant", + ( + "data", + pytest.param( + "dims", marks=pytest.mark.xfail(reason="indexes don't support units") + ), + "coords", + ), + ) + def test_content_manipulation(self, func, variant, dtype): + variants = { + "data": ( + (unit_registry.m ** 3, unit_registry.Pa, unit_registry.degK), + 1, + 1, + ), + "dims": ((1, 1, 1), unit_registry.m, 1), + "coords": ((1, 1, 1), 1, unit_registry.m), + } + (unit1, unit2, unit3), dim_unit, coord_unit = variants.get(variant) - x = np.arange(10) * unit_registry.m - x2 = x.to(unit_registry.mm) - y = np.arange(5) * unit_registry.m - z = np.arange(8) * unit_registry.m + array1 = np.linspace(-5, 5, 5 * 4).reshape(5, 4).astype(dtype) * unit1 + array2 = np.linspace(10, 20, 5 * 4 * 3).reshape(5, 4, 3).astype(dtype) * unit2 + array3 = np.linspace(0, 10, 5).astype(dtype) * unit3 + + x = np.arange(5) * dim_unit + y = np.arange(4) * dim_unit + z = np.arange(3) * dim_unit + + x2 = np.linspace(-1, 0, 5) * coord_unit ds = xr.Dataset( data_vars={ - "a": xr.DataArray(data=array1, dims=("x", "y")), - "b": xr.DataArray(data=array2, dims=("x", "y", "z")), - "c": xr.DataArray(data=array3, dims="x"), + "a": (("x", "y"), array1), + "b": (("x", "y", "z"), array2), + "c": ("x", array3), }, coords={"x": x, "y": y, "z": z, "x2": ("x", x2)}, ) - units = { - **extract_units(ds), - **{ - "y2": unit_registry.mm, - "x_mm": unit_registry.mm, - "offset_x": unit_registry.m, - "d": unit_registry.Pa, - "temperature": unit_registry.degK, - }, + + new_units = { + "y2": unit_registry.mm, + "x_mm": coord_unit, + "offset_x": unit_registry.m, + "d": unit2, + "temperature": unit3, } + units = merge_mappings(extract_units(ds), new_units) stripped_kwargs = { key: strip_units(value) for key, value in func.kwargs.items() @@ -5331,7 +5256,8 @@ def test_content_manipulation(self, func, dtype): expected = attach_units(func(strip_units(ds), **stripped_kwargs), units) actual = func(ds) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) @pytest.mark.parametrize( "unit,error", @@ -5356,25 +5282,29 @@ def test_content_manipulation(self, func, dtype): ), ) def test_merge(self, variant, unit, error, dtype): - original_data_unit = unit_registry.m - original_dim_unit = unit_registry.m - original_coord_unit = unit_registry.m + left_variants = { + "data": (unit_registry.m, 1, 1), + "dims": (1, unit_registry.m, 1), + "coords": (1, 1, unit_registry.m), + } - variants = { - "data": (unit, original_dim_unit, original_coord_unit), - "dims": (original_data_unit, unit, original_coord_unit), - "coords": (original_data_unit, original_dim_unit, unit), + left_data_unit, left_dim_unit, left_coord_unit = left_variants.get(variant) + + right_variants = { + "data": (unit, 1, 1), + "dims": (1, unit, 1), + "coords": (1, 1, unit), } - data_unit, dim_unit, coord_unit = variants.get(variant) + right_data_unit, right_dim_unit, right_coord_unit = right_variants.get(variant) - left_array = np.arange(10).astype(dtype) * original_data_unit - right_array = np.arange(-5, 5).astype(dtype) * data_unit + left_array = np.arange(10).astype(dtype) * left_data_unit + right_array = np.arange(-5, 5).astype(dtype) * right_data_unit - left_dim = np.arange(10, 20) * original_dim_unit - right_dim = np.arange(5, 15) * dim_unit + left_dim = np.arange(10, 20) * left_dim_unit + right_dim = np.arange(5, 15) * right_dim_unit - left_coord = np.arange(-10, 0) * original_coord_unit - right_coord = np.arange(-15, -5) * coord_unit + left_coord = np.arange(-10, 0) * left_coord_unit + right_coord = np.arange(-15, -5) * right_coord_unit left = xr.Dataset( data_vars={"a": ("x", left_array)}, @@ -5397,4 +5327,5 @@ def test_merge(self, variant, unit, error, dtype): expected = attach_units(strip_units(left).merge(strip_units(converted)), units) actual = left.merge(right) - assert_equal_with_units(expected, actual) + assert_units_equal(expected, actual) + assert_equal(expected, actual) From b9e6a36ff7a0ca3593165cf191f4152666fa4a66 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 17 Jun 2020 22:45:10 -0700 Subject: [PATCH 56/71] Revise pull request template (#4039) * Revise pull request template See below for the new language, to clarify that documentation is only necessary for "user visible changes." I added "including notable bug fixes" to indicate that minor bug fixes may not be worth noting (I was thinking of test-suite only fixes in this category) but perhaps that is too confusing. * remove line break * Update releasing notes --- .github/PULL_REQUEST_TEMPLATE.md | 3 +- HOW_TO_RELEASE.md | 53 ++++++++++++++++---------------- 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index a921bddaa23..c9c0b720c35 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -3,4 +3,5 @@ - [ ] Closes #xxxx - [ ] Tests added - [ ] Passes `isort -rc . && black . && mypy . && flake8` - - [ ] Fully documented, including `whats-new.rst` for all changes and `api.rst` for new API + - [ ] User visible changes (including notable bug fixes) are documented in `whats-new.rst` + - [ ] New functions/methods are listed in `api.rst` diff --git a/HOW_TO_RELEASE.md b/HOW_TO_RELEASE.md index 3fdd1d7236d..c890d61d966 100644 --- a/HOW_TO_RELEASE.md +++ b/HOW_TO_RELEASE.md @@ -1,4 +1,4 @@ -How to issue an xarray release in 16 easy steps +# How to issue an xarray release in 17 easy steps Time required: about an hour. @@ -6,7 +6,16 @@ Time required: about an hour. ``` git pull upstream master ``` - 2. Look over whats-new.rst and the docs. Make sure "What's New" is complete + 2. Get a list of contributors with: + ``` + git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format=%aN | sort -u | perl -pe 's/\n/$1, /' + ``` + or by substituting the _previous_ release in: + ``` + git log v0.X.Y-1.. --format=%aN | sort -u | perl -pe 's/\n/$1, /' + ``` + Add these into `whats-new.rst` somewhere :) + 3. Look over whats-new.rst and the docs. Make sure "What's New" is complete (check the date!) and consider adding a brief summary note describing the release at the top. Things to watch out for: @@ -16,41 +25,41 @@ Time required: about an hour. due to a bad merge. Check for these before a release by using git diff, e.g., `git diff v0.X.Y whats-new.rst` where 0.X.Y is the previous release. - 3. If you have any doubts, run the full test suite one final time! + 4. If you have any doubts, run the full test suite one final time! ``` pytest ``` - 4. Check that the ReadTheDocs build is passing. - 5. On the master branch, commit the release in git: + 5. Check that the ReadTheDocs build is passing. + 6. On the master branch, commit the release in git: ``` git commit -am 'Release v0.X.Y' ``` - 6. Tag the release: + 7. Tag the release: ``` git tag -a v0.X.Y -m 'v0.X.Y' ``` - 7. Build source and binary wheels for pypi: + 8. Build source and binary wheels for pypi: ``` git clean -xdf # this deletes all uncommited changes! python setup.py bdist_wheel sdist ``` - 8. Use twine to check the package build: + 9. Use twine to check the package build: ``` twine check dist/xarray-0.X.Y* ``` - 9. Use twine to register and upload the release on pypi. Be careful, you can't +10. Use twine to register and upload the release on pypi. Be careful, you can't take this back! ``` twine upload dist/xarray-0.X.Y* ``` You will need to be listed as a package owner at https://pypi.python.org/pypi/xarray for this to work. -10. Push your changes to master: +11. Push your changes to master: ``` git push upstream master git push upstream --tags ``` -11. Update the stable branch (used by ReadTheDocs) and switch back to master: +12. Update the stable branch (used by ReadTheDocs) and switch back to master: ``` git checkout stable git rebase master @@ -60,7 +69,7 @@ Time required: about an hour. It's OK to force push to 'stable' if necessary. (We also update the stable branch with `git cherrypick` for documentation only fixes that apply the current released version.) -12. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: +13. Add a section for the next release (v.X.Y+1) to doc/whats-new.rst: ``` .. _whats-new.0.X.Y+1: @@ -86,19 +95,19 @@ Time required: about an hour. Internal Changes ~~~~~~~~~~~~~~~~ ``` -13. Commit your changes and push to master again: +14. Commit your changes and push to master again: ``` git commit -am 'New whatsnew section' git push upstream master ``` You're done pushing to master! -14. Issue the release on GitHub. Click on "Draft a new release" at +15. Issue the release on GitHub. Click on "Draft a new release" at https://github.com/pydata/xarray/releases. Type in the version number, but don't bother to describe it -- we maintain that on the docs instead. -15. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ +16. Update the docs. Login to https://readthedocs.org/projects/xray/versions/ and switch your new release tag (at the bottom) from "Inactive" to "Active". It should now build automatically. -16. Issue the release announcement! For bug fix releases, I usually only email +17. Issue the release announcement! For bug fix releases, I usually only email xarray@googlegroups.com. For major/feature releases, I will email a broader list (no more than once every 3-6 months): - pydata@googlegroups.com @@ -109,18 +118,8 @@ Time required: about an hour. Google search will turn up examples of prior release announcements (look for "ANN xarray"). - You can get a list of contributors with: - ``` - git log "$(git tag --sort="v:refname" | sed -n 'x;$p').." --format="%aN" | sort -u - ``` - or by substituting the _previous_ release in: - ``` - git log v0.X.Y-1.. --format="%aN" | sort -u - ``` - NB: copying this output into a Google Groups form can cause - [issues](https://groups.google.com/forum/#!topic/xarray/hK158wAviPs) with line breaks, so take care -Note on version numbering: +## Note on version numbering We follow a rough approximation of semantic version. Only major releases (0.X.0) should include breaking changes. Minor releases (0.X.Y) are for bug fixes and From 2a8cd3b0545851cff2773d493e30d5c84aa1c4db Mon Sep 17 00:00:00 2001 From: keewis Date: Tue, 23 Jun 2020 00:51:56 +0200 Subject: [PATCH 57/71] use builtin python types instead of the numpy alias (#4170) * replace np.bool with the python type * replace np.int with the python type * replace np.complex with the builtin python type * replace np.float with the builtin python type --- xarray/coding/times.py | 4 ++-- xarray/conventions.py | 2 +- xarray/core/common.py | 2 +- xarray/core/formatting.py | 2 +- xarray/tests/test_backends.py | 2 +- xarray/tests/test_conventions.py | 6 ++---- xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_dataset.py | 10 +++++----- xarray/tests/test_dtypes.py | 4 ++-- xarray/tests/test_plot.py | 4 ++-- 10 files changed, 18 insertions(+), 20 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index dafa8ca03b1..77b2d2c7937 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -158,7 +158,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) except (KeyError, OutOfBoundsDatetime, OverflowError): dates = _decode_datetime_with_cftime( - flat_num_dates.astype(np.float), units, calendar + flat_num_dates.astype(float), units, calendar ) if ( @@ -179,7 +179,7 @@ def decode_cf_datetime(num_dates, units, calendar=None, use_cftime=None): dates = cftime_to_nptime(dates) elif use_cftime: dates = _decode_datetime_with_cftime( - flat_num_dates.astype(np.float), units, calendar + flat_num_dates.astype(float), units, calendar ) else: dates = _decode_datetime_with_pandas(flat_num_dates, units, calendar) diff --git a/xarray/conventions.py b/xarray/conventions.py index 588fcea71a3..fc0572944f3 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -116,7 +116,7 @@ def maybe_default_fill_value(var): def maybe_encode_bools(var): if ( - (var.dtype == np.bool) + (var.dtype == bool) and ("dtype" not in var.encoding) and ("dtype" not in var.attrs) ): diff --git a/xarray/core/common.py b/xarray/core/common.py index e343f342040..f759f4c32dd 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1481,7 +1481,7 @@ def zeros_like(other, dtype: DTypeLike = None): * lat (lat) int64 1 2 * lon (lon) int64 0 1 2 - >>> xr.zeros_like(x, dtype=np.float) + >>> xr.zeros_like(x, dtype=float) array([[0., 0., 0.], [0., 0., 0.]]) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index bd9576a4440..3a9dd772a9f 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -140,7 +140,7 @@ def format_item(x, timedelta_format=None, quote_strings=True): return format_timedelta(x, timedelta_format=timedelta_format) elif isinstance(x, (str, bytes)): return repr(x) if quote_strings else x - elif isinstance(x, (float, np.float)): + elif isinstance(x, (float, np.float_)): return f"{x:.4}" else: return str(x) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 3642c1eb9b7..177435fa864 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -885,7 +885,7 @@ def test_roundtrip_endian(self): "x": np.arange(3, 10, dtype=">i2"), "y": np.arange(3, 20, dtype=" Date: Wed, 24 Jun 2020 16:41:05 +0200 Subject: [PATCH 58/71] Proposal for better error message about in-place operation (#3976) * Improve error message: automatic alignment during in-place operation. * Sorted imports. * Fix tests. * Add suggestions from S. Hoyer. --- xarray/core/dataarray.py | 13 ++++++++++--- xarray/tests/test_dataarray.py | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 5814c828663..b0df874953b 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -53,7 +53,7 @@ from .formatting import format_item from .indexes import Indexes, default_indexes, propagate_indexes from .indexing import is_fancy_indexer -from .merge import PANDAS_TYPES, _extract_indexes_from_coords +from .merge import PANDAS_TYPES, MergeError, _extract_indexes_from_coords from .options import OPTIONS from .utils import Default, ReprObject, _check_inplace, _default, either_dict_or_kwargs from .variable import ( @@ -2713,8 +2713,15 @@ def func(self, other): # don't support automatic alignment with in-place arithmetic. other_coords = getattr(other, "coords", None) other_variable = getattr(other, "variable", other) - with self.coords._merge_inplace(other_coords): - f(self.variable, other_variable) + try: + with self.coords._merge_inplace(other_coords): + f(self.variable, other_variable) + except MergeError as exc: + raise MergeError( + "Automatic alignment is not supported for in-place operations.\n" + "Consider aligning the indices manually or using a not-in-place operation.\n" + "See https://github.com/pydata/xarray/issues/3910 for more explanations." + ) from exc return self return func diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 36bee63bf3b..8fc37ac458d 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1930,9 +1930,9 @@ def test_inplace_math_basics(self): def test_inplace_math_automatic_alignment(self): a = DataArray(range(5), [("x", range(5))]) b = DataArray(range(1, 6), [("x", range(1, 6))]) - with pytest.raises(xr.MergeError): + with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"): a += b - with pytest.raises(xr.MergeError): + with pytest.raises(xr.MergeError, match="Automatic alignment is not supported"): b += a def test_math_name(self): From a2dac231cd946893d9fc51219b0c053e04fa7fb7 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Wed, 24 Jun 2020 08:44:59 -0700 Subject: [PATCH 59/71] Remove
     from nested HTML repr (#4171)
    
    Using `
    ` messes up the display of nested HTML reprs, e.g., from dask. Now
    we only use the `
    ` tag when displaying text.
    ---
     xarray/core/formatting_html.py       | 8 +++++---
     xarray/tests/test_formatting_html.py | 2 +-
     2 files changed, 6 insertions(+), 4 deletions(-)
    
    diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
    index 69832d6ca3d..c99683e91c7 100644
    --- a/xarray/core/formatting_html.py
    +++ b/xarray/core/formatting_html.py
    @@ -20,7 +20,9 @@ def short_data_repr_html(array):
         internal_data = getattr(array, "variable", array)._data
         if hasattr(internal_data, "_repr_html_"):
             return internal_data._repr_html_()
    -    return escape(short_data_repr(array))
    +    else:
    +        text = escape(short_data_repr(array))
    +        return f"
    {text}
    " def format_dims(dims, coord_names): @@ -123,7 +125,7 @@ def summarize_variable(name, var, is_index=False, dtype=None, preview=None): f"" f"
    {attrs_ul}
    " - f"
    {data_repr}
    " + f"
    {data_repr}
    " ) @@ -193,7 +195,7 @@ def array_section(obj): f"" f"" f"
    {preview}
    " - f"
    {data_repr}
    " + f"
    {data_repr}
    " "
    " ) diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py index 90e74f1f78f..ea636403318 100644 --- a/xarray/tests/test_formatting_html.py +++ b/xarray/tests/test_formatting_html.py @@ -48,7 +48,7 @@ def dataset(): def test_short_data_repr_html(dataarray): data_repr = fh.short_data_repr_html(dataarray) - assert data_repr.startswith("array") + assert data_repr.startswith("
    array")
     
     
     def test_short_data_repr_html_non_str_keys(dataset):
    
    From f281b3b62712079605d0f873c2f38623212bdef0 Mon Sep 17 00:00:00 2001
    From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
    Date: Wed, 24 Jun 2020 12:04:11 -0400
    Subject: [PATCH 60/71] Limit length of dataarray reprs (#3905)
    
    * limit length of dataarray reprs
    
    * repr depends on numpy versions
    
    * whatsnew
    
    * correct comment based on @keewis comment
    
    * Update whats-new.rst
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst               |  4 +++-
     xarray/core/formatting.py       | 15 +++++++++++++--
     xarray/tests/test_formatting.py | 13 +++++++++++--
     3 files changed, 27 insertions(+), 5 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 4b5bb1e491f..ea3e32d3a80 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -66,6 +66,9 @@ New Features
     - Limited the length of array items with long string reprs to a
       reasonable width (:pull:`3900`)
       By `Maximilian Roos `_
    +- Limited the number of lines of large arrays when numpy reprs would have greater than 40.
    +  (:pull:`3905`)
    +  By `Maximilian Roos `_
     - Implement :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
       :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:issue:`60`, :pull:`3871`)
       By `Todd Jennings `_
    @@ -96,7 +99,6 @@ New Features
       By `Deepak Cherian `_
     - :py:meth:`map_blocks` can now handle dask-backed xarray objects in ``args``. (:pull:`3818`)
       By `Deepak Cherian `_
    -
     - Add keyword ``decode_timedelta`` to :py:func:`xarray.open_dataset`,
       (:py:func:`xarray.open_dataarray`, :py:func:`xarray.open_dataarray`,
       :py:func:`xarray.decode_cf`) that allows to disable/enable the decoding of timedeltas
    diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
    index 3a9dd772a9f..28eaae5f05b 100644
    --- a/xarray/core/formatting.py
    +++ b/xarray/core/formatting.py
    @@ -3,7 +3,7 @@
     import contextlib
     import functools
     from datetime import datetime, timedelta
    -from itertools import zip_longest
    +from itertools import chain, zip_longest
     from typing import Hashable
     
     import numpy as np
    @@ -422,6 +422,17 @@ def set_numpy_options(*args, **kwargs):
             np.set_printoptions(**original)
     
     
    +def limit_lines(string: str, *, limit: int):
    +    """
    +    If the string is more lines than the limit,
    +    this returns the middle lines replaced by an ellipsis
    +    """
    +    lines = string.splitlines()
    +    if len(lines) > limit:
    +        string = "\n".join(chain(lines[: limit // 2], ["..."], lines[-limit // 2 :]))
    +    return string
    +
    +
     def short_numpy_repr(array):
         array = np.asarray(array)
     
    @@ -447,7 +458,7 @@ def short_data_repr(array):
         elif hasattr(internal_data, "__array_function__") or isinstance(
             internal_data, dask_array_type
         ):
    -        return repr(array.data)
    +        return limit_lines(repr(array.data), limit=40)
         elif array._in_memory or array.size < 1e5:
             return short_numpy_repr(array)
         else:
    diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py
    index 6881c0bc0ff..82de8080c80 100644
    --- a/xarray/tests/test_formatting.py
    +++ b/xarray/tests/test_formatting.py
    @@ -405,10 +405,19 @@ def test_short_numpy_repr():
             np.random.randn(20, 20),
             np.random.randn(5, 10, 15),
             np.random.randn(5, 10, 15, 3),
    +        np.random.randn(100, 5, 1),
         ]
         # number of lines:
    -    # for default numpy repr: 167, 140, 254, 248
    -    # for short_numpy_repr: 1, 7, 24, 19
    +    # for default numpy repr: 167, 140, 254, 248, 599
    +    # for short_numpy_repr: 1, 7, 24, 19, 25
         for array in cases:
             num_lines = formatting.short_numpy_repr(array).count("\n") + 1
             assert num_lines < 30
    +
    +
    +def test_large_array_repr_length():
    +
    +    da = xr.DataArray(np.random.randn(100, 5, 1))
    +
    +    result = repr(da).splitlines()
    +    assert len(result) < 50
    
    From 24d755d59421fd0eaf22ad109408275d2bfb8216 Mon Sep 17 00:00:00 2001
    From: johnomotani 
    Date: Wed, 24 Jun 2020 19:22:18 +0100
    Subject: [PATCH 61/71] Fix 4009 (#4173)
    
    * Test attrs handling in open_mfdataset
    
    * Fix attrs handling in open_mfdataset()
    
    Need to pass combine_attrs="drop", to allow attrs_file to set the attrs.
    
    * Update whats-new.rst
    
    * Update doc/whats-new.rst
    
    Co-authored-by: Deepak Cherian 
    ---
     doc/whats-new.rst             |  2 ++
     xarray/backends/api.py        |  8 +++++++-
     xarray/tests/test_backends.py | 30 ++++++++++++++++++++++++++++++
     3 files changed, 39 insertions(+), 1 deletion(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index ea3e32d3a80..bf57f5e951d 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -107,6 +107,8 @@ New Features
     
     Bug fixes
     ~~~~~~~~~
    +- Fix errors combining attrs in :py:func:`open_mfdataset` (:issue:`4009`, :pull:`4173`)
    +  By `John Omotani `_
     - If groupby receives a ``DataArray`` with name=None, assign a default name (:issue:`158`)
       By `Phil Butcher `_.
     - Support dark mode in VS code (:issue:`4024`)
    diff --git a/xarray/backends/api.py b/xarray/backends/api.py
    index 0919d2a582b..4077d7a02c8 100644
    --- a/xarray/backends/api.py
    +++ b/xarray/backends/api.py
    @@ -967,12 +967,18 @@ def open_mfdataset(
                     coords=coords,
                     ids=ids,
                     join=join,
    +                combine_attrs="drop",
                 )
             elif combine == "by_coords":
                 # Redo ordering from coordinates, ignoring how they were ordered
                 # previously
                 combined = combine_by_coords(
    -                datasets, compat=compat, data_vars=data_vars, coords=coords, join=join
    +                datasets,
    +                compat=compat,
    +                data_vars=data_vars,
    +                coords=coords,
    +                join=join,
    +                combine_attrs="drop",
                 )
             else:
                 raise ValueError(
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 177435fa864..1e33eccb83e 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -2662,6 +2662,36 @@ def test_open_mfdataset_does_same_as_concat(self, combine, opt, join):
                     ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join)
                     assert_identical(ds, ds_expect)
     
    +    def test_open_mfdataset_dataset_attr_by_coords(self):
    +        """
    +        Case when an attribute differs across the multiple files
    +        """
    +        with self.setup_files_and_datasets() as (files, [ds1, ds2]):
    +            # Give the files an inconsistent attribute
    +            for i, f in enumerate(files):
    +                ds = open_dataset(f).load()
    +                ds.attrs["test_dataset_attr"] = 10 + i
    +                ds.close()
    +                ds.to_netcdf(f)
    +
    +            with xr.open_mfdataset(files, combine="by_coords", concat_dim="t") as ds:
    +                assert ds.test_dataset_attr == 10
    +
    +    def test_open_mfdataset_dataarray_attr_by_coords(self):
    +        """
    +        Case when an attribute of a member DataArray differs across the multiple files
    +        """
    +        with self.setup_files_and_datasets() as (files, [ds1, ds2]):
    +            # Give the files an inconsistent attribute
    +            for i, f in enumerate(files):
    +                ds = open_dataset(f).load()
    +                ds["v1"].attrs["test_dataarray_attr"] = i
    +                ds.close()
    +                ds.to_netcdf(f)
    +
    +            with xr.open_mfdataset(files, combine="by_coords", concat_dim="t") as ds:
    +                assert ds["v1"].test_dataarray_attr == 0
    +
         @pytest.mark.parametrize("combine", ["nested", "by_coords"])
         @pytest.mark.parametrize("opt", ["all", "minimal", "different"])
         def test_open_mfdataset_exact_join_raises_error(self, combine, opt):
    
    From 3088de25987f6863ba6c7a73b23a7ca7a8c93a69 Mon Sep 17 00:00:00 2001
    From: Tom Nicholas <35968931+TomNicholas@users.noreply.github.com>
    Date: Wed, 24 Jun 2020 19:22:54 +0100
    Subject: [PATCH 62/71] Remove old auto combine (#3926)
    
    * Removed auto_combine function and argument to open_mfdataset
    
    * Removed corresponding tests
    
    * Code formatting
    
    * updated what's new
    
    * PEP8 fixes
    
    * Update doc/whats-new.rst
    
    `:py:func:` links fixed
    
    Co-Authored-By: keewis 
    
    * removed auto_combine from API docs
    
    * clarify that auto_combine is completely removed
    
    * concat_dim=None by default for combine='nested'
    
    * fix black formatting
    
    Co-authored-by: keewis 
    Co-authored-by: dcherian 
    ---
     doc/api-hidden.rst            |   2 -
     doc/api.rst                   |   1 -
     doc/whats-new.rst             |   7 +
     xarray/__init__.py            |   3 +-
     xarray/backends/api.py        |  61 ++------
     xarray/core/combine.py        | 271 ----------------------------------
     xarray/tests/test_backends.py |  86 ++---------
     xarray/tests/test_combine.py  | 176 +---------------------
     8 files changed, 34 insertions(+), 573 deletions(-)
    
    diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
    index 313428c29d2..5542e488143 100644
    --- a/doc/api-hidden.rst
    +++ b/doc/api-hidden.rst
    @@ -9,8 +9,6 @@
     .. autosummary::
        :toctree: generated/
     
    -   auto_combine
    -
        Dataset.nbytes
        Dataset.chunks
     
    diff --git a/doc/api.rst b/doc/api.rst
    index bb0edd0dfa5..603e3e8f6cf 100644
    --- a/doc/api.rst
    +++ b/doc/api.rst
    @@ -21,7 +21,6 @@ Top-level functions
        broadcast
        concat
        merge
    -   auto_combine
        combine_by_coords
        combine_nested
        where
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index bf57f5e951d..a4ec85c1950 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -33,6 +33,13 @@ Breaking changes
       `_.
       (:pull:`3274`)
       By `Elliott Sales de Andrade `_
    +- The old :py:func:`auto_combine` function has now been removed in
    +  favour of the :py:func:`combine_by_coords` and
    +  :py:func:`combine_nested` functions. This also means that
    +  the default behaviour of :py:func:`open_mfdataset` has changed to use
    +  ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`)
    +  By `Tom Nicholas `_.
    +
     
     Enhancements
     ~~~~~~~~~~~~
    diff --git a/xarray/__init__.py b/xarray/__init__.py
    index cb4824d188d..3886edc60e6 100644
    --- a/xarray/__init__.py
    +++ b/xarray/__init__.py
    @@ -16,7 +16,7 @@
     from .coding.frequencies import infer_freq
     from .conventions import SerializationWarning, decode_cf
     from .core.alignment import align, broadcast
    -from .core.combine import auto_combine, combine_by_coords, combine_nested
    +from .core.combine import combine_by_coords, combine_nested
     from .core.common import ALL_DIMS, full_like, ones_like, zeros_like
     from .core.computation import apply_ufunc, corr, cov, dot, polyval, where
     from .core.concat import concat
    @@ -47,7 +47,6 @@
         "align",
         "apply_ufunc",
         "as_variable",
    -    "auto_combine",
         "broadcast",
         "cftime_range",
         "combine_by_coords",
    diff --git a/xarray/backends/api.py b/xarray/backends/api.py
    index 4077d7a02c8..8d7c2230b2d 100644
    --- a/xarray/backends/api.py
    +++ b/xarray/backends/api.py
    @@ -4,7 +4,6 @@
     from io import BytesIO
     from numbers import Number
     from pathlib import Path
    -from textwrap import dedent
     from typing import (
         TYPE_CHECKING,
         Callable,
    @@ -23,7 +22,6 @@
     from ..core.combine import (
         _infer_concat_order_from_positions,
         _nested_combine,
    -    auto_combine,
         combine_by_coords,
     )
     from ..core.dataarray import DataArray
    @@ -726,14 +724,14 @@ def close(self):
     def open_mfdataset(
         paths,
         chunks=None,
    -    concat_dim="_not_supplied",
    +    concat_dim=None,
         compat="no_conflicts",
         preprocess=None,
         engine=None,
         lock=None,
         data_vars="all",
         coords="different",
    -    combine="_old_auto",
    +    combine="by_coords",
         autoclose=None,
         parallel=False,
         join="outer",
    @@ -746,9 +744,8 @@ def open_mfdataset(
         the datasets into one before returning the result, and if combine='nested' then
         ``combine_nested`` is used. The filepaths must be structured according to which
         combining function is used, the details of which are given in the documentation for
    -    ``combine_by_coords`` and ``combine_nested``. By default the old (now deprecated)
    -    ``auto_combine`` will be used, please specify either ``combine='by_coords'`` or
    -    ``combine='nested'`` in future. Requires dask to be installed. See documentation for
    +    ``combine_by_coords`` and ``combine_nested``. By default ``combine='by_coords'``
    +    will be used. Requires dask to be installed. See documentation for
         details on dask [1]_. Global attributes from the ``attrs_file`` are used
         for the combined dataset.
     
    @@ -758,7 +755,7 @@ def open_mfdataset(
             Either a string glob in the form ``"path/to/my/files/*.nc"`` or an explicit list of
             files to open. Paths can be given as strings or as pathlib Paths. If
             concatenation along more than one dimension is desired, then ``paths`` must be a
    -        nested list-of-lists (see ``manual_combine`` for details). (A string glob will
    +        nested list-of-lists (see ``combine_nested`` for details). (A string glob will
             be expanded to a 1-dimensional list.)
         chunks : int or dict, optional
             Dictionary with keys given by dimension names and values given by chunk sizes.
    @@ -768,15 +765,16 @@ def open_mfdataset(
             see the full documentation for more details [2]_.
         concat_dim : str, or list of str, DataArray, Index or None, optional
             Dimensions to concatenate files along.  You only need to provide this argument
    -        if any of the dimensions along which you want to concatenate is not a dimension
    -        in the original datasets, e.g., if you want to stack a collection of 2D arrays
    -        along a third dimension. Set ``concat_dim=[..., None, ...]`` explicitly to
    -        disable concatenation along a particular dimension.
    +        if ``combine='by_coords'``, and if any of the dimensions along which you want to
    +        concatenate is not a dimension in the original datasets, e.g., if you want to
    +        stack a collection of 2D arrays along a third dimension. Set
    +        ``concat_dim=[..., None, ...]`` explicitly to disable concatenation along a
    +        particular dimension. Default is None, which for a 1D list of filepaths is
    +        equivalent to opening the files separately and then merging them with
    +        ``xarray.merge``.
         combine : {'by_coords', 'nested'}, optional
             Whether ``xarray.combine_by_coords`` or ``xarray.combine_nested`` is used to
    -        combine all the data. If this argument is not provided, `xarray.auto_combine` is
    -        used, but in the future this behavior will switch to use
    -        `xarray.combine_by_coords` by default.
    +        combine all the data. Default is to use ``xarray.combine_by_coords``.
         compat : {'identical', 'equals', 'broadcast_equals',
                   'no_conflicts', 'override'}, optional
             String indicating how to compare variables of the same name for
    @@ -869,7 +867,6 @@ def open_mfdataset(
         --------
         combine_by_coords
         combine_nested
    -    auto_combine
         open_dataset
     
         References
    @@ -897,11 +894,8 @@ def open_mfdataset(
         # If combine='nested' then this creates a flat list which is easier to
         # iterate over, while saving the originally-supplied structure as "ids"
         if combine == "nested":
    -        if str(concat_dim) == "_not_supplied":
    -            raise ValueError("Must supply concat_dim when using " "combine='nested'")
    -        else:
    -            if isinstance(concat_dim, (str, DataArray)) or concat_dim is None:
    -                concat_dim = [concat_dim]
    +        if isinstance(concat_dim, (str, DataArray)) or concat_dim is None:
    +            concat_dim = [concat_dim]
         combined_ids_paths = _infer_concat_order_from_positions(paths)
         ids, paths = (list(combined_ids_paths.keys()), list(combined_ids_paths.values()))
     
    @@ -933,30 +927,7 @@ def open_mfdataset(
     
         # Combine all datasets, closing them in case of a ValueError
         try:
    -        if combine == "_old_auto":
    -            # Use the old auto_combine for now
    -            # Remove this after deprecation cycle from #2616 is complete
    -            basic_msg = dedent(
    -                """\
    -            In xarray version 0.15 the default behaviour of `open_mfdataset`
    -            will change. To retain the existing behavior, pass
    -            combine='nested'. To use future default behavior, pass
    -            combine='by_coords'. See
    -            http://xarray.pydata.org/en/stable/combining.html#combining-multi
    -            """
    -            )
    -            warnings.warn(basic_msg, FutureWarning, stacklevel=2)
    -
    -            combined = auto_combine(
    -                datasets,
    -                concat_dim=concat_dim,
    -                compat=compat,
    -                data_vars=data_vars,
    -                coords=coords,
    -                join=join,
    -                from_openmfds=True,
    -            )
    -        elif combine == "nested":
    +        if combine == "nested":
                 # Combined nested list by successive concat and merge operations
                 # along each dimension, using structure given by "ids"
                 combined = _nested_combine(
    diff --git a/xarray/core/combine.py b/xarray/core/combine.py
    index 1f990457798..58bd7178fa2 100644
    --- a/xarray/core/combine.py
    +++ b/xarray/core/combine.py
    @@ -1,7 +1,5 @@
     import itertools
    -import warnings
     from collections import Counter
    -from textwrap import dedent
     
     import pandas as pd
     
    @@ -762,272 +760,3 @@ def combine_by_coords(
             join=join,
             combine_attrs=combine_attrs,
         )
    -
    -
    -# Everything beyond here is only needed until the deprecation cycle in #2616
    -# is completed
    -
    -
    -_CONCAT_DIM_DEFAULT = "__infer_concat_dim__"
    -
    -
    -def auto_combine(
    -    datasets,
    -    concat_dim="_not_supplied",
    -    compat="no_conflicts",
    -    data_vars="all",
    -    coords="different",
    -    fill_value=dtypes.NA,
    -    join="outer",
    -    from_openmfds=False,
    -):
    -    """
    -    Attempt to auto-magically combine the given datasets into one.
    -
    -    This entire function is deprecated in favour of ``combine_nested`` and
    -    ``combine_by_coords``.
    -
    -    This method attempts to combine a list of datasets into a single entity by
    -    inspecting metadata and using a combination of concat and merge.
    -    It does not concatenate along more than one dimension or sort data under
    -    any circumstances. It does align coordinates, but different variables on
    -    datasets can cause it to fail under some scenarios. In complex cases, you
    -    may need to clean up your data and use ``concat``/``merge`` explicitly.
    -    ``auto_combine`` works well if you have N years of data and M data
    -    variables, and each combination of a distinct time period and set of data
    -    variables is saved its own dataset.
    -
    -    Parameters
    -    ----------
    -    datasets : sequence of xarray.Dataset
    -        Dataset objects to merge.
    -    concat_dim : str or DataArray or Index, optional
    -        Dimension along which to concatenate variables, as used by
    -        :py:func:`xarray.concat`. You only need to provide this argument if
    -        the dimension along which you want to concatenate is not a dimension
    -        in the original datasets, e.g., if you want to stack a collection of
    -        2D arrays along a third dimension.
    -        By default, xarray attempts to infer this argument by examining
    -        component files. Set ``concat_dim=None`` explicitly to disable
    -        concatenation.
    -    compat : {'identical', 'equals', 'broadcast_equals',
    -             'no_conflicts', 'override'}, optional
    -        String indicating how to compare variables of the same name for
    -        potential conflicts:
    -
    -        - 'broadcast_equals': all values must be equal when variables are
    -          broadcast against each other to ensure common dimensions.
    -        - 'equals': all values and dimensions must be the same.
    -        - 'identical': all values, dimensions and attributes must be the
    -          same.
    -        - 'no_conflicts': only values which are not null in both datasets
    -          must be equal. The returned dataset then contains the combination
    -          of all non-null values.
    -        - 'override': skip comparing and pick variable from first dataset
    -    data_vars : {'minimal', 'different', 'all' or list of str}, optional
    -        Details are in the documentation of concat
    -    coords : {'minimal', 'different', 'all' o list of str}, optional
    -        Details are in the documentation of concat
    -    fill_value : scalar, optional
    -        Value to use for newly missing values
    -    join : {'outer', 'inner', 'left', 'right', 'exact'}, optional
    -        String indicating how to combine differing indexes
    -        (excluding concat_dim) in objects
    -
    -        - 'outer': use the union of object indexes
    -        - 'inner': use the intersection of object indexes
    -        - 'left': use indexes from the first object with each dimension
    -        - 'right': use indexes from the last object with each dimension
    -        - 'exact': instead of aligning, raise `ValueError` when indexes to be
    -          aligned are not equal
    -        - 'override': if indexes are of same size, rewrite indexes to be
    -          those of the first object with that dimension. Indexes for the same
    -          dimension must have the same size in all objects.
    -
    -    Returns
    -    -------
    -    combined : xarray.Dataset
    -
    -    See also
    -    --------
    -    concat
    -    Dataset.merge
    -    """
    -
    -    if not from_openmfds:
    -        basic_msg = dedent(
    -            """\
    -        In xarray version 0.15 `auto_combine` will be deprecated. See
    -        http://xarray.pydata.org/en/stable/combining.html#combining-multi"""
    -        )
    -        warnings.warn(basic_msg, FutureWarning, stacklevel=2)
    -
    -    if concat_dim == "_not_supplied":
    -        concat_dim = _CONCAT_DIM_DEFAULT
    -        message = ""
    -    else:
    -        message = dedent(
    -            """\
    -        Also `open_mfdataset` will no longer accept a `concat_dim` argument.
    -        To get equivalent behaviour from now on please use the new
    -        `combine_nested` function instead (or the `combine='nested'` option to
    -        `open_mfdataset`)."""
    -        )
    -
    -    if _dimension_coords_exist(datasets):
    -        message += dedent(
    -            """\
    -        The datasets supplied have global dimension coordinates. You may want
    -        to use the new `combine_by_coords` function (or the
    -        `combine='by_coords'` option to `open_mfdataset`) to order the datasets
    -        before concatenation. Alternatively, to continue concatenating based
    -        on the order the datasets are supplied in future, please use the new
    -        `combine_nested` function (or the `combine='nested'` option to
    -        open_mfdataset)."""
    -        )
    -    else:
    -        message += dedent(
    -            """\
    -        The datasets supplied do not have global dimension coordinates. In
    -        future, to continue concatenating without supplying dimension
    -        coordinates, please use the new `combine_nested` function (or the
    -        `combine='nested'` option to open_mfdataset."""
    -        )
    -
    -    if _requires_concat_and_merge(datasets):
    -        manual_dims = [concat_dim].append(None)
    -        message += dedent(
    -            """\
    -        The datasets supplied require both concatenation and merging. From
    -        xarray version 0.15 this will operation will require either using the
    -        new `combine_nested` function (or the `combine='nested'` option to
    -        open_mfdataset), with a nested list structure such that you can combine
    -        along the dimensions {}. Alternatively if your datasets have global
    -        dimension coordinates then you can use the new `combine_by_coords`
    -        function.""".format(
    -                manual_dims
    -            )
    -        )
    -
    -    warnings.warn(message, FutureWarning, stacklevel=2)
    -
    -    return _old_auto_combine(
    -        datasets,
    -        concat_dim=concat_dim,
    -        compat=compat,
    -        data_vars=data_vars,
    -        coords=coords,
    -        fill_value=fill_value,
    -        join=join,
    -    )
    -
    -
    -def _dimension_coords_exist(datasets):
    -    """
    -    Check if the datasets have consistent global dimension coordinates
    -    which would in future be used by `auto_combine` for concatenation ordering.
    -    """
    -
    -    # Group by data vars
    -    sorted_datasets = sorted(datasets, key=vars_as_keys)
    -    grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
    -
    -    # Simulates performing the multidimensional combine on each group of data
    -    # variables before merging back together
    -    try:
    -        for vars, datasets_with_same_vars in grouped_by_vars:
    -            _infer_concat_order_from_coords(list(datasets_with_same_vars))
    -        return True
    -    except ValueError:
    -        # ValueError means datasets don't have global dimension coordinates
    -        # Or something else went wrong in trying to determine them
    -        return False
    -
    -
    -def _requires_concat_and_merge(datasets):
    -    """
    -    Check if the datasets require the use of both xarray.concat and
    -    xarray.merge, which in future might require the user to use
    -    `manual_combine` instead.
    -    """
    -    # Group by data vars
    -    sorted_datasets = sorted(datasets, key=vars_as_keys)
    -    grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys)
    -
    -    return len(list(grouped_by_vars)) > 1
    -
    -
    -def _old_auto_combine(
    -    datasets,
    -    concat_dim=_CONCAT_DIM_DEFAULT,
    -    compat="no_conflicts",
    -    data_vars="all",
    -    coords="different",
    -    fill_value=dtypes.NA,
    -    join="outer",
    -):
    -    if concat_dim is not None:
    -        dim = None if concat_dim is _CONCAT_DIM_DEFAULT else concat_dim
    -
    -        sorted_datasets = sorted(datasets, key=vars_as_keys)
    -        grouped = itertools.groupby(sorted_datasets, key=vars_as_keys)
    -
    -        concatenated = [
    -            _auto_concat(
    -                list(datasets),
    -                dim=dim,
    -                data_vars=data_vars,
    -                coords=coords,
    -                compat=compat,
    -                fill_value=fill_value,
    -                join=join,
    -            )
    -            for vars, datasets in grouped
    -        ]
    -    else:
    -        concatenated = datasets
    -    merged = merge(concatenated, compat=compat, fill_value=fill_value, join=join)
    -    return merged
    -
    -
    -def _auto_concat(
    -    datasets,
    -    dim=None,
    -    data_vars="all",
    -    coords="different",
    -    fill_value=dtypes.NA,
    -    join="outer",
    -    compat="no_conflicts",
    -):
    -    if len(datasets) == 1 and dim is None:
    -        # There is nothing more to combine, so kick out early.
    -        return datasets[0]
    -    else:
    -        if dim is None:
    -            ds0 = datasets[0]
    -            ds1 = datasets[1]
    -            concat_dims = set(ds0.dims)
    -            if ds0.dims != ds1.dims:
    -                dim_tuples = set(ds0.dims.items()) - set(ds1.dims.items())
    -                concat_dims = {i for i, _ in dim_tuples}
    -            if len(concat_dims) > 1:
    -                concat_dims = {d for d in concat_dims if not ds0[d].equals(ds1[d])}
    -            if len(concat_dims) > 1:
    -                raise ValueError(
    -                    "too many different dimensions to " "concatenate: %s" % concat_dims
    -                )
    -            elif len(concat_dims) == 0:
    -                raise ValueError(
    -                    "cannot infer dimension to concatenate: "
    -                    "supply the ``concat_dim`` argument "
    -                    "explicitly"
    -                )
    -            (dim,) = concat_dims
    -        return concat(
    -            datasets,
    -            dim=dim,
    -            data_vars=data_vars,
    -            coords=coords,
    -            fill_value=fill_value,
    -            compat=compat,
    -        )
    diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
    index 1e33eccb83e..6a840e6303e 100644
    --- a/xarray/tests/test_backends.py
    +++ b/xarray/tests/test_backends.py
    @@ -2977,16 +2977,6 @@ def test_open_mfdataset_auto_combine(self):
                     with open_mfdataset([tmp2, tmp1], combine="by_coords") as actual:
                         assert_identical(original, actual)
     
    -    def test_open_mfdataset_combine_nested_no_concat_dim(self):
    -        original = Dataset({"foo": ("x", np.random.randn(10)), "x": np.arange(10)})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                original.isel(x=slice(5)).to_netcdf(tmp1)
    -                original.isel(x=slice(5, 10)).to_netcdf(tmp2)
    -
    -                with raises_regex(ValueError, "Must supply concat_dim"):
    -                    open_mfdataset([tmp2, tmp1], combine="nested")
    -
         @pytest.mark.xfail(reason="mfdataset loses encoding currently.")
         def test_encoding_mfdataset(self):
             original = Dataset(
    @@ -3080,6 +3070,15 @@ def test_open_mfdataset_concat_dim_none(self):
                     ) as actual:
                         assert_identical(data, actual)
     
    +    def test_open_mfdataset_concat_dim_default_none(self):
    +        with create_tmp_file() as tmp1:
    +            with create_tmp_file() as tmp2:
    +                data = Dataset({"x": 0})
    +                data.to_netcdf(tmp1)
    +                Dataset({"x": np.nan}).to_netcdf(tmp2)
    +                with open_mfdataset([tmp1, tmp2], combine="nested") as actual:
    +                    assert_identical(data, actual)
    +
         def test_open_dataset(self):
             original = Dataset({"foo": ("x", np.random.randn(10))})
             with create_tmp_file() as tmp:
    @@ -3203,73 +3202,6 @@ def test_load_dataarray(self):
                 ds.to_netcdf(tmp)
     
     
    -@requires_scipy_or_netCDF4
    -@requires_dask
    -class TestOpenMFDataSetDeprecation:
    -    """
    -    Set of tests to check that FutureWarnings are correctly raised until the
    -    deprecation cycle is complete. #2616
    -    """
    -
    -    def test_open_mfdataset_default(self):
    -        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                ds1.to_netcdf(tmp1)
    -                ds2.to_netcdf(tmp2)
    -
    -                with pytest.warns(
    -                    FutureWarning, match="default behaviour of" " `open_mfdataset`"
    -                ):
    -                    open_mfdataset([tmp1, tmp2])
    -
    -    def test_open_mfdataset_with_concat_dim(self):
    -        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                ds1.to_netcdf(tmp1)
    -                ds2.to_netcdf(tmp2)
    -
    -                with pytest.warns(FutureWarning, match="`concat_dim`"):
    -                    open_mfdataset([tmp1, tmp2], concat_dim="x")
    -
    -    def test_auto_combine_with_merge_and_concat(self):
    -        ds1, ds2 = Dataset({"x": [0]}), Dataset({"x": [1]})
    -        ds3 = Dataset({"z": ((), 99)})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                with create_tmp_file() as tmp3:
    -                    ds1.to_netcdf(tmp1)
    -                    ds2.to_netcdf(tmp2)
    -                    ds3.to_netcdf(tmp3)
    -
    -                    with pytest.warns(
    -                        FutureWarning, match="require both concatenation"
    -                    ):
    -                        open_mfdataset([tmp1, tmp2, tmp3])
    -
    -    def test_auto_combine_with_coords(self):
    -        ds1 = Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])})
    -        ds2 = Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                ds1.to_netcdf(tmp1)
    -                ds2.to_netcdf(tmp2)
    -
    -                with pytest.warns(FutureWarning, match="supplied have global"):
    -                    open_mfdataset([tmp1, tmp2])
    -
    -    def test_auto_combine_without_coords(self):
    -        ds1, ds2 = Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})
    -        with create_tmp_file() as tmp1:
    -            with create_tmp_file() as tmp2:
    -                ds1.to_netcdf(tmp1)
    -                ds2.to_netcdf(tmp2)
    -
    -                with pytest.warns(FutureWarning, match="supplied do not have global"):
    -                    open_mfdataset([tmp1, tmp2])
    -
    -
     @requires_scipy_or_netCDF4
     @requires_pydap
     @pytest.mark.filterwarnings("ignore:The binary mode of fromstring is deprecated")
    diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
    index c3f981f10d1..59f61f59722 100644
    --- a/xarray/tests/test_combine.py
    +++ b/xarray/tests/test_combine.py
    @@ -4,14 +4,7 @@
     import numpy as np
     import pytest
     
    -from xarray import (
    -    DataArray,
    -    Dataset,
    -    auto_combine,
    -    combine_by_coords,
    -    combine_nested,
    -    concat,
    -)
    +from xarray import DataArray, Dataset, combine_by_coords, combine_nested, concat
     from xarray.core import dtypes
     from xarray.core.combine import (
         _check_shape_tile_ids,
    @@ -818,173 +811,6 @@ def test_combine_by_coords_incomplete_hypercube(self):
                 combine_by_coords([x1, x2, x3], fill_value=None)
     
     
    -@pytest.mark.filterwarnings(
    -    "ignore:In xarray version 0.15 `auto_combine` " "will be deprecated"
    -)
    -@pytest.mark.filterwarnings("ignore:Also `open_mfdataset` will no longer")
    -@pytest.mark.filterwarnings("ignore:The datasets supplied")
    -class TestAutoCombineOldAPI:
    -    """
    -    Set of tests which check that old 1-dimensional auto_combine behaviour is
    -    still satisfied. #2616
    -    """
    -
    -    def test_auto_combine(self):
    -        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": [0, 1]})
    -        assert_identical(expected, actual)
    -
    -        actual = auto_combine([actual])
    -        assert_identical(expected, actual)
    -
    -        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": [0, 1, 2]})
    -        assert_identical(expected, actual)
    -
    -        # ensure auto_combine handles non-sorted variables
    -        objs = [
    -            Dataset({"x": ("a", [0]), "y": ("a", [0])}),
    -            Dataset({"y": ("a", [1]), "x": ("a", [1])}),
    -        ]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
    -        assert_identical(expected, actual)
    -
    -        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
    -        with raises_regex(ValueError, "too many .* dimensions"):
    -            auto_combine(objs)
    -
    -        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
    -        with raises_regex(ValueError, "cannot infer dimension"):
    -            auto_combine(objs)
    -
    -        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
    -        with raises_regex(ValueError, "'y' is not present in all datasets"):
    -            auto_combine(objs)
    -
    -    def test_auto_combine_previously_failed(self):
    -        # In the above scenario, one file is missing, containing the data for
    -        # one year's data for one variable.
    -        datasets = [
    -            Dataset({"a": ("x", [0]), "x": [0]}),
    -            Dataset({"b": ("x", [0]), "x": [0]}),
    -            Dataset({"a": ("x", [1]), "x": [1]}),
    -        ]
    -        expected = Dataset({"a": ("x", [0, 1]), "b": ("x", [0, np.nan])}, {"x": [0, 1]})
    -        actual = auto_combine(datasets)
    -        assert_identical(expected, actual)
    -
    -        # Your data includes "time" and "station" dimensions, and each year's
    -        # data has a different set of stations.
    -        datasets = [
    -            Dataset({"a": ("x", [2, 3]), "x": [1, 2]}),
    -            Dataset({"a": ("x", [1, 2]), "x": [0, 1]}),
    -        ]
    -        expected = Dataset(
    -            {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]}
    -        )
    -        actual = auto_combine(datasets, concat_dim="t")
    -        assert_identical(expected, actual)
    -
    -    def test_auto_combine_with_new_variables(self):
    -        datasets = [Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, {"y": 1, "z": 1})]
    -        actual = auto_combine(datasets, "y")
    -        expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1})
    -        assert_identical(expected, actual)
    -
    -    def test_auto_combine_no_concat(self):
    -        objs = [Dataset({"x": 0}), Dataset({"y": 1})]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": 0, "y": 1})
    -        assert_identical(expected, actual)
    -
    -        objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"x": 0, "y": 1, "z": 2})
    -        assert_identical(expected, actual)
    -
    -        data = Dataset({"x": 0})
    -        actual = auto_combine([data, data, data], concat_dim=None)
    -        assert_identical(data, actual)
    -
    -        # Single object, with a concat_dim explicitly provided
    -        # Test the issue reported in GH #1988
    -        objs = [Dataset({"x": 0, "y": 1})]
    -        dim = DataArray([100], name="baz", dims="baz")
    -        actual = auto_combine(objs, concat_dim=dim)
    -        expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]})
    -        assert_identical(expected, actual)
    -
    -        # Just making sure that auto_combine is doing what is
    -        # expected for non-scalar values, too.
    -        objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})]
    -        dim = DataArray([100], name="baz", dims="baz")
    -        actual = auto_combine(objs, concat_dim=dim)
    -        expected = Dataset(
    -            {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])},
    -            {"baz": [100]},
    -        )
    -        assert_identical(expected, actual)
    -
    -    def test_auto_combine_order_by_appearance_not_coords(self):
    -        objs = [
    -            Dataset({"foo": ("x", [0])}, coords={"x": ("x", [1])}),
    -            Dataset({"foo": ("x", [1])}, coords={"x": ("x", [0])}),
    -        ]
    -        actual = auto_combine(objs)
    -        expected = Dataset({"foo": ("x", [0, 1])}, coords={"x": ("x", [1, 0])})
    -        assert_identical(expected, actual)
    -
    -    @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0])
    -    def test_auto_combine_fill_value(self, fill_value):
    -        datasets = [
    -            Dataset({"a": ("x", [2, 3]), "x": [1, 2]}),
    -            Dataset({"a": ("x", [1, 2]), "x": [0, 1]}),
    -        ]
    -        if fill_value == dtypes.NA:
    -            # if we supply the default, we expect the missing value for a
    -            # float array
    -            fill_value = np.nan
    -        expected = Dataset(
    -            {"a": (("t", "x"), [[fill_value, 2, 3], [1, 2, fill_value]])},
    -            {"x": [0, 1, 2]},
    -        )
    -        actual = auto_combine(datasets, concat_dim="t", fill_value=fill_value)
    -        assert_identical(expected, actual)
    -
    -
    -class TestAutoCombineDeprecation:
    -    """
    -    Set of tests to check that FutureWarnings are correctly raised until the
    -    deprecation cycle is complete. #2616
    -    """
    -
    -    def test_auto_combine_with_concat_dim(self):
    -        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
    -        with pytest.warns(FutureWarning, match="`concat_dim`"):
    -            auto_combine(objs, concat_dim="x")
    -
    -    def test_auto_combine_with_merge_and_concat(self):
    -        objs = [Dataset({"x": [0]}), Dataset({"x": [1]}), Dataset({"z": ((), 99)})]
    -        with pytest.warns(FutureWarning, match="require both concatenation"):
    -            auto_combine(objs)
    -
    -    def test_auto_combine_with_coords(self):
    -        objs = [
    -            Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])}),
    -            Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])}),
    -        ]
    -        with pytest.warns(FutureWarning, match="supplied have global"):
    -            auto_combine(objs)
    -
    -    def test_auto_combine_without_coords(self):
    -        objs = [Dataset({"foo": ("x", [0])}), Dataset({"foo": ("x", [1])})]
    -        with pytest.warns(FutureWarning, match="supplied do not have global"):
    -            auto_combine(objs)
    -
    -
     @requires_cftime
     def test_combine_by_coords_distant_cftime_dates():
         # Regression test for https://github.com/pydata/xarray/issues/3535
    
    From 5121d867a50af328353153a3bbc7656c154a602f Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Wed, 24 Jun 2020 20:24:54 +0200
    Subject: [PATCH 63/71] use assert_allclose in the aggregation-with-units tests
     (#4174)
    
    * use assert_allclose in the aggregation tests
    
    * install pint using pip
    ---
     ci/requirements/py36-min-nep18.yml    | 3 ++-
     ci/requirements/py36.yml              | 2 +-
     ci/requirements/py37-windows.yml      | 2 +-
     ci/requirements/py37.yml              | 2 +-
     ci/requirements/py38-all-but-dask.yml | 2 +-
     ci/requirements/py38.yml              | 2 +-
     xarray/tests/test_units.py            | 6 +++---
     7 files changed, 10 insertions(+), 9 deletions(-)
    
    diff --git a/ci/requirements/py36-min-nep18.yml b/ci/requirements/py36-min-nep18.yml
    index 48b9c057260..cd2b1a18c77 100644
    --- a/ci/requirements/py36-min-nep18.yml
    +++ b/ci/requirements/py36-min-nep18.yml
    @@ -11,7 +11,6 @@ dependencies:
       - msgpack-python=0.6  # remove once distributed is bumped. distributed GH3491
       - numpy=1.17
       - pandas=0.25
    -  - pint
       - pip
       - pytest
       - pytest-cov
    @@ -19,3 +18,5 @@ dependencies:
       - scipy=1.2
       - setuptools=41.2
       - sparse=0.8
    +  - pip:
    +      - pint==0.13
    diff --git a/ci/requirements/py36.yml b/ci/requirements/py36.yml
    index a500173f277..aa2baf9dcce 100644
    --- a/ci/requirements/py36.yml
    +++ b/ci/requirements/py36.yml
    @@ -28,7 +28,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -45,3 +44,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/ci/requirements/py37-windows.yml b/ci/requirements/py37-windows.yml
    index e9e5c7a900a..8b12704d644 100644
    --- a/ci/requirements/py37-windows.yml
    +++ b/ci/requirements/py37-windows.yml
    @@ -28,7 +28,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -45,3 +44,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/ci/requirements/py37.yml b/ci/requirements/py37.yml
    index dba3926596e..70c453e8776 100644
    --- a/ci/requirements/py37.yml
    +++ b/ci/requirements/py37.yml
    @@ -28,7 +28,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -45,3 +44,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/ci/requirements/py38-all-but-dask.yml b/ci/requirements/py38-all-but-dask.yml
    index a375d9e1e5a..6d76eecbd6a 100644
    --- a/ci/requirements/py38-all-but-dask.yml
    +++ b/ci/requirements/py38-all-but-dask.yml
    @@ -25,7 +25,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -42,3 +41,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/ci/requirements/py38.yml b/ci/requirements/py38.yml
    index 7dff3a1bd97..6f35138978c 100644
    --- a/ci/requirements/py38.yml
    +++ b/ci/requirements/py38.yml
    @@ -28,7 +28,6 @@ dependencies:
       - numba
       - numpy
       - pandas
    -  - pint
       - pip
       - pseudonetcdf
       - pydap
    @@ -45,3 +44,4 @@ dependencies:
       - zarr
       - pip:
         - numbagg
    +    - pint
    diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
    index b477e8cccb2..fb9063ca49e 100644
    --- a/xarray/tests/test_units.py
    +++ b/xarray/tests/test_units.py
    @@ -1438,7 +1438,7 @@ def test_aggregation(self, func, dtype):
             actual = func(variable)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_identical(expected, actual)
    +        assert_allclose(expected, actual)
     
         # TODO: remove once pint==0.12 has been released
         @pytest.mark.xfail(
    @@ -2296,7 +2296,7 @@ def test_aggregation(self, func, dtype):
             actual = func(data_array)
     
             assert_units_equal(expected, actual)
    -        xr.testing.assert_allclose(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize(
             "func",
    @@ -3861,7 +3861,7 @@ def test_aggregation(self, func, dtype):
             expected = attach_units(func(strip_units(ds)), units)
     
             assert_units_equal(expected, actual)
    -        assert_equal(expected, actual)
    +        assert_allclose(expected, actual)
     
         @pytest.mark.parametrize("property", ("imag", "real"))
         def test_numpy_properties(self, property, dtype):
    
    From f4638afe009fde5f53de1a1b80cc71f62593c463 Mon Sep 17 00:00:00 2001
    From: Pascal Bourgault 
    Date: Wed, 24 Jun 2020 23:59:51 -0400
    Subject: [PATCH 64/71] Correct dask handling for 1D idxmax/min on ND data
     (#4135)
    
    * Correct dask handling for 1D idxmax/min on ND data
    
    * Passing black and others
    
    * Edit Whats New
    ---
     doc/whats-new.rst              |  4 ++--
     xarray/core/computation.py     |  2 +-
     xarray/tests/test_dataarray.py | 19 +++++++++++++++++++
     3 files changed, 22 insertions(+), 3 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index a4ec85c1950..d82be79270e 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -80,8 +80,8 @@ New Features
       :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:issue:`60`, :pull:`3871`)
       By `Todd Jennings `_
     - Support dask handling for :py:meth:`DataArray.idxmax`, :py:meth:`DataArray.idxmin`,
    -  :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:pull:`3922`)
    -  By `Kai Mühlbauer `_.
    +  :py:meth:`Dataset.idxmax`, :py:meth:`Dataset.idxmin`.  (:pull:`3922`, :pull:`4135`)
    +  By `Kai Mühlbauer `_ and `Pascal Bourgault `_.
     - More support for unit aware arrays with pint (:pull:`3643`, :pull:`3975`)
       By `Justus Magin `_.
     - Support overriding existing variables in ``to_zarr()`` with ``mode='a'`` even
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index cecd4fd8e70..4f4fd475c82 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -1563,7 +1563,7 @@ def _calc_idxminmax(
     
             chunks = dict(zip(array.dims, array.chunks))
             dask_coord = dask.array.from_array(array[dim].data, chunks=chunks[dim])
    -        res = indx.copy(data=dask_coord[(indx.data,)])
    +        res = indx.copy(data=dask_coord[indx.data.ravel()].reshape(indx.shape))
             # we need to attach back the dim name
             res.name = dim
         else:
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index 8fc37ac458d..d942667a4c7 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -5257,6 +5257,25 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask):
             assert_identical(result7, expected7)
     
     
    +class TestReduceND(TestReduce):
    +    @pytest.mark.parametrize("op", ["idxmin", "idxmax"])
    +    @pytest.mark.parametrize("ndim", [3, 5])
    +    def test_idxminmax_dask(self, op, ndim):
    +        if not has_dask:
    +            pytest.skip("requires dask")
    +
    +        ar0_raw = xr.DataArray(
    +            np.random.random_sample(size=[10] * ndim),
    +            dims=[i for i in "abcdefghij"[: ndim - 1]] + ["x"],
    +            coords={"x": np.arange(10)},
    +            attrs=self.attrs,
    +        )
    +
    +        ar0_dsk = ar0_raw.chunk({})
    +        # Assert idx is the same with dask and without
    +        assert_equal(getattr(ar0_dsk, op)(dim="x"), getattr(ar0_raw, op)(dim="x"))
    +
    +
     @pytest.fixture(params=[1])
     def da(request):
         if request.param == 1:
    
    From 65ca92a5c0a4143d00dd7a822bcb1d49738717f1 Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Wed, 24 Jun 2020 23:20:56 -0700
    Subject: [PATCH 65/71] Add CONTRIBUTING.md for the benefit of GitHub
    
    ---
     CONTRIBUTING.md | 1 +
     1 file changed, 1 insertion(+)
     create mode 100644 CONTRIBUTING.md
    
    diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
    new file mode 100644
    index 00000000000..7a909aefd08
    --- /dev/null
    +++ b/CONTRIBUTING.md
    @@ -0,0 +1 @@
    +Xarray's contributor guidelines [can be found in our online documentation](http://xarray.pydata.org/en/stable/contributing.html)
    
    From 732750a06aef2025b206ba6ff765f5acc53bfa25 Mon Sep 17 00:00:00 2001
    From: keewis 
    Date: Sat, 27 Jun 2020 10:31:11 +0200
    Subject: [PATCH 66/71] Blackdoc (#4177)
    
    * add blackdoc to the pre-commit configuration
    
    * use the stable version of blackdoc
    
    * run blackdoc on all files
    
    * add blackdoc to the linter / formatting tools section
    
    * use language names to enable syntax highlighting
    
    * update whats-new.rst
    ---
     .pre-commit-config.yaml    |  4 +++
     doc/contributing.rst       | 21 ++++++++++-----
     doc/dask.rst               |  5 +++-
     doc/internals.rst          |  9 ++++---
     doc/plotting.rst           |  4 +--
     doc/whats-new.rst          |  3 +++
     xarray/core/computation.py | 52 +++++++++++++++++++++++++-------------
     xarray/core/parallel.py    |  5 +++-
     8 files changed, 71 insertions(+), 32 deletions(-)
    
    diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
    index 1d384e58a3c..447f0007fc2 100644
    --- a/.pre-commit-config.yaml
    +++ b/.pre-commit-config.yaml
    @@ -11,6 +11,10 @@ repos:
         rev: stable
         hooks:
           - id: black
    +  - repo: https://github.com/keewis/blackdoc
    +    rev: stable
    +    hooks:
    +      - id: blackdoc
       - repo: https://gitlab.com/pycqa/flake8
         rev: 3.7.9
         hooks:
    diff --git a/doc/contributing.rst b/doc/contributing.rst
    index 51dba2bb0cc..9e6a3c250e9 100644
    --- a/doc/contributing.rst
    +++ b/doc/contributing.rst
    @@ -148,7 +148,7 @@ We'll now kick off a two-step process:
     1. Install the build dependencies
     2. Build and install xarray
     
    -.. code-block:: none
    +.. code-block:: sh
     
        # Create and activate the build environment
        # This is for Linux and MacOS. On Windows, use py37-windows.yml instead.
    @@ -162,7 +162,10 @@ We'll now kick off a two-step process:
        # Build and install xarray
        pip install -e .
     
    -At this point you should be able to import *xarray* from your locally built version::
    +At this point you should be able to import *xarray* from your locally
    +built version:
    +
    +.. code-block:: sh
     
        $ python  # start an interpreter
        >>> import xarray
    @@ -256,7 +259,9 @@ Some other important things to know about the docs:
     - The tutorials make heavy use of the `ipython directive
       `_ sphinx extension.
       This directive lets you put code in the documentation which will be run
    -  during the doc build. For example::
    +  during the doc build. For example:
    +
    +  .. code:: rst
     
           .. ipython:: python
     
    @@ -290,7 +295,7 @@ Requirements
     Make sure to follow the instructions on :ref:`creating a development environment above `, but
     to build the docs you need to use the environment file ``ci/requirements/doc.yml``.
     
    -.. code-block:: none
    +.. code-block:: sh
     
         # Create and activate the docs environment
         conda env create -f ci/requirements/doc.yml
    @@ -347,7 +352,10 @@ Code Formatting
     
     xarray uses several tools to ensure a consistent code format throughout the project:
     
    -- `Black `_ for standardized code formatting
    +- `Black `_ for standardized
    +  code formatting
    +- `blackdoc `_ for
    +  standardized code formatting in documentation
     - `Flake8 `_ for general code quality
     - `isort `_ for standardized order in imports.
       See also `flake8-isort `_.
    @@ -356,12 +364,13 @@ xarray uses several tools to ensure a consistent code format throughout the proj
     
     ``pip``::
     
    -   pip install black flake8 isort mypy
    +   pip install black flake8 isort mypy blackdoc
     
     and then run from the root of the Xarray repository::
     
        isort -rc .
        black -t py36 .
    +   blackdoc -t py36 .
        flake8
        mypy .
     
    diff --git a/doc/dask.rst b/doc/dask.rst
    index df223982ba4..de25ee2200e 100644
    --- a/doc/dask.rst
    +++ b/doc/dask.rst
    @@ -432,6 +432,7 @@ received by the applied function.
             print(da.sizes)
             return da.time
     
    +
         mapped = xr.map_blocks(func, ds.temperature)
         mapped
     
    @@ -461,9 +462,10 @@ Here is a common example where automated inference will not work.
         :okexcept:
     
         def func(da):
    -	print(da.sizes)
    +        print(da.sizes)
             return da.isel(time=[1])
     
    +
         mapped = xr.map_blocks(func, ds.temperature)
     
     ``func`` cannot be run on 0-shaped inputs because it is not possible to extract element 1 along a
    @@ -501,6 +503,7 @@ Notice that the 0-shaped sizes were not printed to screen. Since ``template`` ha
         def func(obj, a, b=0):
             return obj + a + b
     
    +
         mapped = ds.map_blocks(func, args=[10], kwargs={"b": 10})
         expected = ds + 10 + 10
         mapped.identical(expected)
    diff --git a/doc/internals.rst b/doc/internals.rst
    index 27c7c4e1d87..46c117e312b 100644
    --- a/doc/internals.rst
    +++ b/doc/internals.rst
    @@ -182,9 +182,10 @@ re-open it directly with Zarr:
     
     .. ipython:: python
     
    -    ds = xr.tutorial.load_dataset('rasm')
    -    ds.to_zarr('rasm.zarr', mode='w')
    +    ds = xr.tutorial.load_dataset("rasm")
    +    ds.to_zarr("rasm.zarr", mode="w")
         import zarr
    -    zgroup = zarr.open('rasm.zarr')
    +
    +    zgroup = zarr.open("rasm.zarr")
         print(zgroup.tree())
    -    dict(zgroup['Tair'].attrs)
    +    dict(zgroup["Tair"].attrs)
    \ No newline at end of file
    diff --git a/doc/plotting.rst b/doc/plotting.rst
    index f98f47f2567..72248e31b1e 100644
    --- a/doc/plotting.rst
    +++ b/doc/plotting.rst
    @@ -220,7 +220,7 @@ from the time and assign it as a non-dimension coordinate:
     
     .. ipython:: python
     
    -    decimal_day = (air1d.time - air1d.time[0]) /  pd.Timedelta('1d')
    +    decimal_day = (air1d.time - air1d.time[0]) / pd.Timedelta("1d")
         air1d_multi = air1d.assign_coords(decimal_day=("time", decimal_day))
         air1d_multi
     
    @@ -911,4 +911,4 @@ One can also make line plots with multidimensional coordinates. In this case, ``
         f, ax = plt.subplots(2, 1)
         da.plot.line(x="lon", hue="y", ax=ax[0])
         @savefig plotting_example_2d_hue_xy.png
    -    da.plot.line(x="lon", hue="x", ax=ax[1])
    +    da.plot.line(x="lon", hue="x", ax=ax[1])
    \ No newline at end of file
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index d82be79270e..27d369dd6f7 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -197,6 +197,9 @@ Internal Changes
     - Run the ``isort`` pre-commit hook only on python source files
       and update the ``flake8`` version. (:issue:`3750`, :pull:`3711`)
       By `Justus Magin `_.
    +- Add `blackdoc `_ to the list of
    +  checkers for development. (:pull:`4177`)
    +  By `Justus Magin `_.
     - Add a CI job that runs the tests with every optional dependency
       except ``dask``. (:issue:`3794`, :pull:`3919`)
       By `Justus Magin `_.
    diff --git a/xarray/core/computation.py b/xarray/core/computation.py
    index 4f4fd475c82..d8a0c53e817 100644
    --- a/xarray/core/computation.py
    +++ b/xarray/core/computation.py
    @@ -1096,10 +1096,14 @@ def cov(da_a, da_b, dim=None, ddof=1):
     
         Examples
         --------
    -    >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    -    ...                  dims=("space", "time"),
    -    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    -    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_a = DataArray(
    +    ...     np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    +    ...     dims=("space", "time"),
    +    ...     coords=[
    +    ...         ("space", ["IA", "IL", "IN"]),
    +    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
    +    ...     ],
    +    ... )
         >>> da_a
         
         array([[1. , 2. , 3. ],
    @@ -1108,10 +1112,14 @@ def cov(da_a, da_b, dim=None, ddof=1):
         Coordinates:
           * space    (space) >> da_b = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    -    ...                  dims=("space", "time"),
    -    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    -    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_b = DataArray(
    +    ...     np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    ...     dims=("space", "time"),
    +    ...     coords=[
    +    ...         ("space", ["IA", "IL", "IN"]),
    +    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
    +    ...     ],
    +    ... )
         >>> da_b
         
         array([[ 0.2,  0.4,  0.6],
    @@ -1123,7 +1131,7 @@ def cov(da_a, da_b, dim=None, ddof=1):
         >>> xr.cov(da_a, da_b)
         
         array(-3.53055556)
    -    >>> xr.cov(da_a, da_b, dim='time')
    +    >>> xr.cov(da_a, da_b, dim="time")
         
         array([ 0.2, -0.5,  1.69333333])
         Coordinates:
    @@ -1165,10 +1173,14 @@ def corr(da_a, da_b, dim=None):
     
         Examples
         --------
    -    >>> da_a = DataArray(np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    -    ...                  dims=("space", "time"),
    -    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    -    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_a = DataArray(
    +    ...     np.array([[1, 2, 3], [0.1, 0.2, 0.3], [3.2, 0.6, 1.8]]),
    +    ...     dims=("space", "time"),
    +    ...     coords=[
    +    ...         ("space", ["IA", "IL", "IN"]),
    +    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
    +    ...     ],
    +    ... )
         >>> da_a
         
         array([[1. , 2. , 3. ],
    @@ -1177,10 +1189,14 @@ def corr(da_a, da_b, dim=None):
         Coordinates:
           * space    (space) >> da_b = DataArray(np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    -    ...                  dims=("space", "time"),
    -    ...                  coords=[('space', ['IA', 'IL', 'IN']),
    -    ...                          ('time', pd.date_range("2000-01-01", freq="1D", periods=3))])
    +    >>> da_b = DataArray(
    +    ...     np.array([[0.2, 0.4, 0.6], [15, 10, 5], [3.2, 0.6, 1.8]]),
    +    ...     dims=("space", "time"),
    +    ...     coords=[
    +    ...         ("space", ["IA", "IL", "IN"]),
    +    ...         ("time", pd.date_range("2000-01-01", freq="1D", periods=3)),
    +    ...     ],
    +    ... )
         >>> da_b
         
         array([[ 0.2,  0.4,  0.6],
    @@ -1192,7 +1208,7 @@ def corr(da_a, da_b, dim=None):
         >>> xr.corr(da_a, da_b)
         
         array(-0.57087777)
    -    >>> xr.corr(da_a, da_b, dim='time')
    +    >>> xr.corr(da_a, da_b, dim="time")
         
         array([ 1., -1.,  1.])
         Coordinates:
    diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py
    index 3a77753d0d1..86044e72dd2 100644
    --- a/xarray/core/parallel.py
    +++ b/xarray/core/parallel.py
    @@ -252,7 +252,10 @@ def map_blocks(
         to the function being applied in ``xr.map_blocks()``:
     
         >>> xr.map_blocks(
    -    ...     calculate_anomaly, array, kwargs={"groupby_type": "time.year"}, template=array,
    +    ...     calculate_anomaly,
    +    ...     array,
    +    ...     kwargs={"groupby_type": "time.year"},
    +    ...     template=array,
         ... )
         
         array([ 0.15361741, -0.25671244, -0.31600032,  0.008463  ,  0.1766172 ,
    
    From a64cf2d5476e7bbda099b34c40b7be1880dbd39a Mon Sep 17 00:00:00 2001
    From: Stephan Hoyer 
    Date: Sun, 28 Jun 2020 10:03:39 -0700
    Subject: [PATCH 67/71] Show data by default in HTML repr for DataArray (#4182)
    
    * Show data by default in HTML repr for DataArray
    
    Fixes GH-4176
    
    * add whats new for html repr
    
    * fix test
    ---
     doc/whats-new.rst                    | 4 +++-
     xarray/core/formatting_html.py       | 2 +-
     xarray/tests/test_formatting_html.py | 4 ++--
     3 files changed, 6 insertions(+), 4 deletions(-)
    
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index 27d369dd6f7..c1440ec1108 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -39,7 +39,9 @@ Breaking changes
       the default behaviour of :py:func:`open_mfdataset` has changed to use
       ``combine='by_coords'`` as the default argument value. (:issue:`2616`, :pull:`3926`)
       By `Tom Nicholas `_.
    -
    +- The ``DataArray`` and ``Variable`` HTML reprs now expand the data section by
    +  default (:issue:`4176`)
    +  By `Stephan Hoyer `_.
     
     Enhancements
     ~~~~~~~~~~~~
    diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py
    index c99683e91c7..400ef61502e 100644
    --- a/xarray/core/formatting_html.py
    +++ b/xarray/core/formatting_html.py
    @@ -184,7 +184,7 @@ def dim_section(obj):
     def array_section(obj):
         # "unique" id to expand/collapse the section
         data_id = "section-" + str(uuid.uuid4())
    -    collapsed = ""
    +    collapsed = "checked"
         variable = getattr(obj, "variable", obj)
         preview = escape(inline_variable_array_repr(variable, max_width=70))
         data_repr = short_data_repr_html(obj)
    diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py
    index ea636403318..9a210ad6fa3 100644
    --- a/xarray/tests/test_formatting_html.py
    +++ b/xarray/tests/test_formatting_html.py
    @@ -108,8 +108,8 @@ def test_summarize_attrs_with_unsafe_attr_name_and_value():
     def test_repr_of_dataarray(dataarray):
         formatted = fh.array_repr(dataarray)
         assert "dim_0" in formatted
    -    # has an expandable data section
    -    assert formatted.count("class='xr-array-in' type='checkbox' >") == 1
    +    # has an expanded data section
    +    assert formatted.count("class='xr-array-in' type='checkbox' checked>") == 1
         # coords and attrs don't have an items so they'll be be disabled and collapsed
         assert (
             formatted.count("class='xr-section-summary-in' type='checkbox' disabled >") == 2
    
    From bdcfab524ef1c852abe6dabcfabc7292f058fddc Mon Sep 17 00:00:00 2001
    From: johnomotani 
    Date: Mon, 29 Jun 2020 20:36:24 +0100
    Subject: [PATCH 68/71] Support multiple dimensions in DataArray.argmin() and
     DataArray.argmax() methods (#3936)
    
    * DataArray.indices_min() and DataArray.indices_max() methods
    
    These return dicts of the indices of the minimum or maximum of a
    DataArray over several dimensions.
    
    * Update whats-new.rst and api.rst with indices_min(), indices_max()
    
    * Fix type checking in DataArray._unravel_argminmax()
    
    * Fix expected results for TestReduce3D.test_indices_max()
    
    * Respect global default for keep_attrs
    
    * Merge behaviour of indices_min/indices_max into argmin/argmax
    
    When argmin or argmax are called with a sequence for 'dim', they now
    return a dict with the indices for each dimension in dim.
    
    * Basic overload of argmin() and argmax() for Dataset
    
    If single dim is passed to Dataset.argmin() or Dataset.argmax(), then
    pass through to _argmin_base or _argmax_base. If a sequence is passed
    for dim, raise an exception, because the result for each DataArray would
    be a dict, which cannot be stored in a Dataset.
    
    * Update Variable and dask tests with _argmin_base, _argmax_base
    
    The basic numpy-style argmin() and argmax() methods were renamed when
    adding support for handling multiple dimensions in DataArray.argmin()
    and DataArray.argmax(). Variable.argmin() and Variable.argmax() are
    therefore renamed as Variable._argmin_base() and
    Variable._argmax_base().
    
    * Update api-hidden.rst with _argmin_base and _argmax_base
    
    * Explicitly defined class methods override injected methods
    
    If a method (such as 'argmin') has been explicitly defined on a class
    (so that hasattr(cls, "argmin")==True), then do not inject that method,
    as it would override the explicitly defined one. Instead inject a
    private method, prefixed by "_injected_" (such as '_injected_argmin'), so
    that the injected method is available to the explicitly defined one.
    
    Do not perform the hasattr check on binary ops, because this breaks
    some operations (e.g. addition between DataArray and int in
    test_dask.py).
    
    * Move StringAccessor back to bottom of DataArray class definition
    
    * Revert use of _argmin_base and _argmax_base
    
    Now not needed because of change to injection in ops.py.
    
    * Move implementation of argmin, argmax from DataArray to Variable
    
    Makes use of argmin and argmax more general (they are available for
    Variable) and is straightforward for DataArray to wrap the Variable
    version.
    
    * Update tests for change to coordinates on result of argmin, argmax
    
    * Add 'out' keyword to argmin/argmax methods - allow numpy call signature
    
    When np.argmin(da) is called, numpy passes an 'out' keyword argument to
    argmin/argmax. Need to allow this argument to avoid errors (but an
    exception is thrown if out is not None).
    
    * Update and correct docstrings for argmin and argmax
    
    * Correct suggested replacement for da.argmin() and da.argmax()
    
    * Remove use of _injected_ methods in argmin/argmax
    
    * Fix typo in name of argminmax_func
    
    Co-Authored-By: keewis 
    
    * Mark argminmax argument to _unravel_argminmax as a string
    
    Co-Authored-By: keewis 
    
    * Hidden internal methods don't need to appear in docs
    
    * Basic docstrings for Dataset.argmin() and Dataset.argmax()
    
    * Set stacklevel for DeprecationWarning in argmin/argmax methods
    
    * Revert "Explicitly defined class methods override injected methods"
    
    This reverts commit 8caf2b8d07c14a2956a26b50ee08d83323c36058.
    
    * Revert "Add 'out' keyword to argmin/argmax methods - allow numpy call signature"
    
    This reverts commit ab480b5c88a059264086260e5090eb38b98aa7fa.
    
    * Remove argmin and argmax from ops.py
    
    * Use self.reduce() in Dataset.argmin() and Dataset.argmax()
    
    Replaces need for "_injected_argmin" and "_injected_argmax".
    
    * Whitespace after 'title' lines in docstrings
    
    * Remove tests of np.argmax() and np.argmin() functions from test_units.py
    
    Applying numpy functions to xarray objects is not necessarily expected
    to work, and the wrapping of argmin() and argmax() is broken by
    xarray-specific interface of argmin() and argmax() methods of Variable,
    DataArray and Dataset.
    
    * Clearer deprecation warnings in Dataset.argmin() and Dataset.argmax()
    
    Also, previously suggested workaround was not correct. Remove suggestion
    as there is no workaround (but the removed behaviour is unlikely to be
    useful).
    
    * Add unravel_index to duck_array_ops, use in Variable._unravel_argminmax
    
    * Filter argmin/argmax DeprecationWarnings in tests
    
    * Correct test for exception for nan in test_argmax
    
    * Remove injected argmin and argmax methods from api-hidden.rst
    
    * flake8 fixes
    
    * Tidy up argmin/argmax following code review
    
    Co-authored-by: Deepak Cherian 
    
    * Remove filters for warnings from argmin/argmax from tests
    
    Pass an explicit axis or dim argument instead to avoid the warning.
    
    * Swap order of reduce_dims checks in Dataset.reduce()
    
    Prefer to pass reduce_dims=None when possible, including for variables
    with only one dimension. Avoids an error if an 'axis' keyword was
    passed.
    
    * revert the changes to Dataset.reduce
    
    * use dim instead of axis
    
    * use dimension instead of Ellipsis
    
    * Make passing 'dim=...' to Dataset.argmin() or Dataset.argmax() an error
    
    * Better docstrings for Dataset.argmin() and Dataset.argmax()
    
    * Update doc/whats-new.rst
    
    Co-authored-by: keewis 
    
    Co-authored-by: Stephan Hoyer 
    Co-authored-by: keewis 
    Co-authored-by: Deepak Cherian 
    Co-authored-by: Keewis 
    ---
     doc/api-hidden.rst             |  20 -
     doc/whats-new.rst              |   7 +
     xarray/core/dataarray.py       | 203 ++++++++
     xarray/core/dataset.py         | 126 +++++
     xarray/core/duck_array_ops.py  |   1 +
     xarray/core/ops.py             |   2 -
     xarray/core/variable.py        | 172 ++++++-
     xarray/tests/test_dataarray.py | 823 +++++++++++++++++++++++++++++++++
     xarray/tests/test_dataset.py   |   6 +
     xarray/tests/test_units.py     |  97 +++-
     xarray/tests/test_variable.py  |   2 +-
     11 files changed, 1415 insertions(+), 44 deletions(-)
    
    diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst
    index 5542e488143..efef4259b74 100644
    --- a/doc/api-hidden.rst
    +++ b/doc/api-hidden.rst
    @@ -41,8 +41,6 @@
     
        core.rolling.DatasetCoarsen.all
        core.rolling.DatasetCoarsen.any
    -   core.rolling.DatasetCoarsen.argmax
    -   core.rolling.DatasetCoarsen.argmin
        core.rolling.DatasetCoarsen.count
        core.rolling.DatasetCoarsen.max
        core.rolling.DatasetCoarsen.mean
    @@ -68,8 +66,6 @@
        core.groupby.DatasetGroupBy.where
        core.groupby.DatasetGroupBy.all
        core.groupby.DatasetGroupBy.any
    -   core.groupby.DatasetGroupBy.argmax
    -   core.groupby.DatasetGroupBy.argmin
        core.groupby.DatasetGroupBy.count
        core.groupby.DatasetGroupBy.max
        core.groupby.DatasetGroupBy.mean
    @@ -85,8 +81,6 @@
        core.resample.DatasetResample.all
        core.resample.DatasetResample.any
        core.resample.DatasetResample.apply
    -   core.resample.DatasetResample.argmax
    -   core.resample.DatasetResample.argmin
        core.resample.DatasetResample.assign
        core.resample.DatasetResample.assign_coords
        core.resample.DatasetResample.bfill
    @@ -110,8 +104,6 @@
        core.resample.DatasetResample.dims
        core.resample.DatasetResample.groups
     
    -   core.rolling.DatasetRolling.argmax
    -   core.rolling.DatasetRolling.argmin
        core.rolling.DatasetRolling.count
        core.rolling.DatasetRolling.max
        core.rolling.DatasetRolling.mean
    @@ -185,8 +177,6 @@
     
        core.rolling.DataArrayCoarsen.all
        core.rolling.DataArrayCoarsen.any
    -   core.rolling.DataArrayCoarsen.argmax
    -   core.rolling.DataArrayCoarsen.argmin
        core.rolling.DataArrayCoarsen.count
        core.rolling.DataArrayCoarsen.max
        core.rolling.DataArrayCoarsen.mean
    @@ -211,8 +201,6 @@
        core.groupby.DataArrayGroupBy.where
        core.groupby.DataArrayGroupBy.all
        core.groupby.DataArrayGroupBy.any
    -   core.groupby.DataArrayGroupBy.argmax
    -   core.groupby.DataArrayGroupBy.argmin
        core.groupby.DataArrayGroupBy.count
        core.groupby.DataArrayGroupBy.max
        core.groupby.DataArrayGroupBy.mean
    @@ -228,8 +216,6 @@
        core.resample.DataArrayResample.all
        core.resample.DataArrayResample.any
        core.resample.DataArrayResample.apply
    -   core.resample.DataArrayResample.argmax
    -   core.resample.DataArrayResample.argmin
        core.resample.DataArrayResample.assign_coords
        core.resample.DataArrayResample.bfill
        core.resample.DataArrayResample.count
    @@ -252,8 +238,6 @@
        core.resample.DataArrayResample.dims
        core.resample.DataArrayResample.groups
     
    -   core.rolling.DataArrayRolling.argmax
    -   core.rolling.DataArrayRolling.argmin
        core.rolling.DataArrayRolling.count
        core.rolling.DataArrayRolling.max
        core.rolling.DataArrayRolling.mean
    @@ -423,8 +407,6 @@
     
        IndexVariable.all
        IndexVariable.any
    -   IndexVariable.argmax
    -   IndexVariable.argmin
        IndexVariable.argsort
        IndexVariable.astype
        IndexVariable.broadcast_equals
    @@ -564,8 +546,6 @@
        CFTimeIndex.all
        CFTimeIndex.any
        CFTimeIndex.append
    -   CFTimeIndex.argmax
    -   CFTimeIndex.argmin
        CFTimeIndex.argsort
        CFTimeIndex.asof
        CFTimeIndex.asof_locs
    diff --git a/doc/whats-new.rst b/doc/whats-new.rst
    index c1440ec1108..086cddee0a0 100644
    --- a/doc/whats-new.rst
    +++ b/doc/whats-new.rst
    @@ -54,6 +54,13 @@ Enhancements
     
     New Features
     ~~~~~~~~~~~~
    +- :py:meth:`DataArray.argmin` and :py:meth:`DataArray.argmax` now support
    +  sequences of 'dim' arguments, and if a sequence is passed return a dict
    +  (which can be passed to :py:meth:`isel` to get the value of the minimum) of
    +  the indices for each dimension of the minimum or maximum of a DataArray.
    +  (:pull:`3936`)
    +  By `John Omotani `_, thanks to `Keisuke Fujii
    +  `_ for work in :pull:`1469`.
     - Added :py:meth:`xarray.infer_freq` for extending frequency inferring to CFTime indexes and data (:pull:`4033`).
       By `Pascal Bourgault `_.
     - ``chunks='auto'`` is now supported in the ``chunks`` argument of
    diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
    index b0df874953b..0ce76a5e23a 100644
    --- a/xarray/core/dataarray.py
    +++ b/xarray/core/dataarray.py
    @@ -3819,6 +3819,209 @@ def idxmax(
                 keep_attrs=keep_attrs,
             )
     
    +    def argmin(
    +        self,
    +        dim: Union[Hashable, Sequence[Hashable]] = None,
    +        axis: int = None,
    +        keep_attrs: bool = None,
    +        skipna: bool = None,
    +    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
    +        """Index or indices of the minimum of the DataArray over one or more dimensions.
    +
    +        If a sequence is passed to 'dim', then result returned as dict of DataArrays,
    +        which can be passed directly to isel(). If a single str is passed to 'dim' then
    +        returns a DataArray with dtype int.
    +
    +        If there are multiple minima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : hashable, sequence of hashable or ..., optional
    +            The dimensions over which to find the minimum. By default, finds minimum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will return a dict with indices for all
    +            dimensions; to return a dict with all dimensions now, pass '...'.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : DataArray or dict of DataArray
    +
    +        See also
    +        --------
    +        Variable.argmin, DataArray.idxmin
    +
    +        Examples
    +        --------
    +        >>> array = xr.DataArray([0, 2, -1, 3], dims="x")
    +        >>> array.min()
    +        
    +        array(-1)
    +        >>> array.argmin()
    +        
    +        array(2)
    +        >>> array.argmin(...)
    +        {'x': 
    +        array(2)}
    +        >>> array.isel(array.argmin(...))
    +        array(-1)
    +
    +        >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]],
    +        ...                       [[1, 3, 2], [2, -5, 1], [2, 3, 1]]],
    +        ...                      dims=("x", "y", "z"))
    +        >>> array.min(dim="x")
    +        
    +        array([[ 1,  2,  1],
    +               [ 2, -5,  1],
    +               [ 2,  1,  1]])
    +        Dimensions without coordinates: y, z
    +        >>> array.argmin(dim="x")
    +        
    +        array([[1, 0, 0],
    +               [1, 1, 1],
    +               [0, 0, 1]])
    +        Dimensions without coordinates: y, z
    +        >>> array.argmin(dim=["x"])
    +        {'x': 
    +        array([[1, 0, 0],
    +               [1, 1, 1],
    +               [0, 0, 1]])
    +        Dimensions without coordinates: y, z}
    +        >>> array.min(dim=("x", "z"))
    +        
    +        array([ 1, -5,  1])
    +        Dimensions without coordinates: y
    +        >>> array.argmin(dim=["x", "z"])
    +        {'x': 
    +        array([0, 1, 0])
    +        Dimensions without coordinates: y, 'z': 
    +        array([2, 1, 1])
    +        Dimensions without coordinates: y}
    +        >>> array.isel(array.argmin(dim=["x", "z"]))
    +        
    +        array([ 1, -5,  1])
    +        Dimensions without coordinates: y
    +        """
    +        result = self.variable.argmin(dim, axis, keep_attrs, skipna)
    +        if isinstance(result, dict):
    +            return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()}
    +        else:
    +            return self._replace_maybe_drop_dims(result)
    +
    +    def argmax(
    +        self,
    +        dim: Union[Hashable, Sequence[Hashable]] = None,
    +        axis: int = None,
    +        keep_attrs: bool = None,
    +        skipna: bool = None,
    +    ) -> Union["DataArray", Dict[Hashable, "DataArray"]]:
    +        """Index or indices of the maximum of the DataArray over one or more dimensions.
    +
    +        If a sequence is passed to 'dim', then result returned as dict of DataArrays,
    +        which can be passed directly to isel(). If a single str is passed to 'dim' then
    +        returns a DataArray with dtype int.
    +
    +        If there are multiple maxima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : hashable, sequence of hashable or ..., optional
    +            The dimensions over which to find the maximum. By default, finds maximum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will return a dict with indices for all
    +            dimensions; to return a dict with all dimensions now, pass '...'.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : DataArray or dict of DataArray
    +
    +        See also
    +        --------
    +        Variable.argmax, DataArray.idxmax
    +
    +        Examples
    +        --------
    +        >>> array = xr.DataArray([0, 2, -1, 3], dims="x")
    +        >>> array.max()
    +        
    +        array(3)
    +        >>> array.argmax()
    +        
    +        array(3)
    +        >>> array.argmax(...)
    +        {'x': 
    +        array(3)}
    +        >>> array.isel(array.argmax(...))
    +        
    +        array(3)
    +
    +        >>> array = xr.DataArray([[[3, 2, 1], [3, 1, 2], [2, 1, 3]],
    +        ...                       [[1, 3, 2], [2, 5, 1], [2, 3, 1]]],
    +        ...                      dims=("x", "y", "z"))
    +        >>> array.max(dim="x")
    +        
    +        array([[3, 3, 2],
    +               [3, 5, 2],
    +               [2, 3, 3]])
    +        Dimensions without coordinates: y, z
    +        >>> array.argmax(dim="x")
    +        
    +        array([[0, 1, 1],
    +               [0, 1, 0],
    +               [0, 1, 0]])
    +        Dimensions without coordinates: y, z
    +        >>> array.argmax(dim=["x"])
    +        {'x': 
    +        array([[0, 1, 1],
    +               [0, 1, 0],
    +               [0, 1, 0]])
    +        Dimensions without coordinates: y, z}
    +        >>> array.max(dim=("x", "z"))
    +        
    +        array([3, 5, 3])
    +        Dimensions without coordinates: y
    +        >>> array.argmax(dim=["x", "z"])
    +        {'x': 
    +        array([0, 1, 0])
    +        Dimensions without coordinates: y, 'z': 
    +        array([0, 1, 2])
    +        Dimensions without coordinates: y}
    +        >>> array.isel(array.argmax(dim=["x", "z"]))
    +        
    +        array([3, 5, 3])
    +        Dimensions without coordinates: y
    +        """
    +        result = self.variable.argmax(dim, axis, keep_attrs, skipna)
    +        if isinstance(result, dict):
    +            return {k: self._replace_maybe_drop_dims(v) for k, v in result.items()}
    +        else:
    +            return self._replace_maybe_drop_dims(result)
    +
         # this needs to be at the end, or mypy will confuse with `str`
         # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
         str = utils.UncachedAccessor(StringAccessor)
    diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
    index a024324bcb1..b46b1d6dce0 100644
    --- a/xarray/core/dataset.py
    +++ b/xarray/core/dataset.py
    @@ -6368,5 +6368,131 @@ def idxmax(
                 )
             )
     
    +    def argmin(self, dim=None, axis=None, **kwargs):
    +        """Indices of the minima of the member variables.
    +
    +        If there are multiple minima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : str, optional
    +            The dimension over which to find the minimum. By default, finds minimum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will be an error, since DataArray.argmin will
    +            return a dict with indices for all dimensions, which does not make sense for
    +            a Dataset.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : Dataset
    +
    +        See also
    +        --------
    +        DataArray.argmin
    +
    +       """
    +        if dim is None and axis is None:
    +            warnings.warn(
    +                "Once the behaviour of DataArray.argmin() and Variable.argmin() with "
    +                "neither dim nor axis argument changes to return a dict of indices of "
    +                "each dimension, for consistency it will be an error to call "
    +                "Dataset.argmin() with no argument, since we don't return a dict of "
    +                "Datasets.",
    +                DeprecationWarning,
    +                stacklevel=2,
    +            )
    +        if (
    +            dim is None
    +            or axis is not None
    +            or (not isinstance(dim, Sequence) and dim is not ...)
    +            or isinstance(dim, str)
    +        ):
    +            # Return int index if single dimension is passed, and is not part of a
    +            # sequence
    +            argmin_func = getattr(duck_array_ops, "argmin")
    +            return self.reduce(argmin_func, dim=dim, axis=axis, **kwargs)
    +        else:
    +            raise ValueError(
    +                "When dim is a sequence or ..., DataArray.argmin() returns a dict. "
    +                "dicts cannot be contained in a Dataset, so cannot call "
    +                "Dataset.argmin() with a sequence or ... for dim"
    +            )
    +
    +    def argmax(self, dim=None, axis=None, **kwargs):
    +        """Indices of the maxima of the member variables.
    +
    +        If there are multiple maxima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : str, optional
    +            The dimension over which to find the maximum. By default, finds maximum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will be an error, since DataArray.argmax will
    +            return a dict with indices for all dimensions, which does not make sense for
    +            a Dataset.
    +        axis : int, optional
    +            Axis over which to apply `argmax`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : Dataset
    +
    +        See also
    +        --------
    +        DataArray.argmax
    +
    +       """
    +        if dim is None and axis is None:
    +            warnings.warn(
    +                "Once the behaviour of DataArray.argmax() and Variable.argmax() with "
    +                "neither dim nor axis argument changes to return a dict of indices of "
    +                "each dimension, for consistency it will be an error to call "
    +                "Dataset.argmax() with no argument, since we don't return a dict of "
    +                "Datasets.",
    +                DeprecationWarning,
    +                stacklevel=2,
    +            )
    +        if (
    +            dim is None
    +            or axis is not None
    +            or (not isinstance(dim, Sequence) and dim is not ...)
    +            or isinstance(dim, str)
    +        ):
    +            # Return int index if single dimension is passed, and is not part of a
    +            # sequence
    +            argmax_func = getattr(duck_array_ops, "argmax")
    +            return self.reduce(argmax_func, dim=dim, axis=axis, **kwargs)
    +        else:
    +            raise ValueError(
    +                "When dim is a sequence or ..., DataArray.argmin() returns a dict. "
    +                "dicts cannot be contained in a Dataset, so cannot call "
    +                "Dataset.argmin() with a sequence or ... for dim"
    +            )
    +
     
     ops.inject_all_ops_and_reduce_methods(Dataset, array_only=False)
    diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py
    index 76719699168..df579d23544 100644
    --- a/xarray/core/duck_array_ops.py
    +++ b/xarray/core/duck_array_ops.py
    @@ -359,6 +359,7 @@ def f(values, axis=None, skipna=None, **kwargs):
     cumprod_1d.numeric_only = True
     cumsum_1d = _create_nan_agg_method("cumsum")
     cumsum_1d.numeric_only = True
    +unravel_index = _dask_or_eager_func("unravel_index")
     
     
     _mean = _create_nan_agg_method("mean")
    diff --git a/xarray/core/ops.py b/xarray/core/ops.py
    index b789f93b4f1..d4aeea37aad 100644
    --- a/xarray/core/ops.py
    +++ b/xarray/core/ops.py
    @@ -47,8 +47,6 @@
     # methods which remove an axis
     REDUCE_METHODS = ["all", "any"]
     NAN_REDUCE_METHODS = [
    -    "argmax",
    -    "argmin",
         "max",
         "min",
         "mean",
    diff --git a/xarray/core/variable.py b/xarray/core/variable.py
    index e19132b1b06..c505c749557 100644
    --- a/xarray/core/variable.py
    +++ b/xarray/core/variable.py
    @@ -6,7 +6,17 @@
     from collections import defaultdict
     from datetime import timedelta
     from distutils.version import LooseVersion
    -from typing import Any, Dict, Hashable, Mapping, Tuple, TypeVar, Union
    +from typing import (
    +    Any,
    +    Dict,
    +    Hashable,
    +    Mapping,
    +    Optional,
    +    Sequence,
    +    Tuple,
    +    TypeVar,
    +    Union,
    +)
     
     import numpy as np
     import pandas as pd
    @@ -2069,6 +2079,166 @@ def _to_numeric(self, offset=None, datetime_unit=None, dtype=float):
             )
             return type(self)(self.dims, numeric_array, self._attrs)
     
    +    def _unravel_argminmax(
    +        self,
    +        argminmax: str,
    +        dim: Union[Hashable, Sequence[Hashable], None],
    +        axis: Union[int, None],
    +        keep_attrs: Optional[bool],
    +        skipna: Optional[bool],
    +    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
    +        """Apply argmin or argmax over one or more dimensions, returning the result as a
    +        dict of DataArray that can be passed directly to isel.
    +        """
    +        if dim is None and axis is None:
    +            warnings.warn(
    +                "Behaviour of argmin/argmax with neither dim nor axis argument will "
    +                "change to return a dict of indices of each dimension. To get a "
    +                "single, flat index, please use np.argmin(da.data) or "
    +                "np.argmax(da.data) instead of da.argmin() or da.argmax().",
    +                DeprecationWarning,
    +                stacklevel=3,
    +            )
    +
    +        argminmax_func = getattr(duck_array_ops, argminmax)
    +
    +        if dim is ...:
    +            # In future, should do this also when (dim is None and axis is None)
    +            dim = self.dims
    +        if (
    +            dim is None
    +            or axis is not None
    +            or not isinstance(dim, Sequence)
    +            or isinstance(dim, str)
    +        ):
    +            # Return int index if single dimension is passed, and is not part of a
    +            # sequence
    +            return self.reduce(
    +                argminmax_func, dim=dim, axis=axis, keep_attrs=keep_attrs, skipna=skipna
    +            )
    +
    +        # Get a name for the new dimension that does not conflict with any existing
    +        # dimension
    +        newdimname = "_unravel_argminmax_dim_0"
    +        count = 1
    +        while newdimname in self.dims:
    +            newdimname = "_unravel_argminmax_dim_{}".format(count)
    +            count += 1
    +
    +        stacked = self.stack({newdimname: dim})
    +
    +        result_dims = stacked.dims[:-1]
    +        reduce_shape = tuple(self.sizes[d] for d in dim)
    +
    +        result_flat_indices = stacked.reduce(argminmax_func, axis=-1, skipna=skipna)
    +
    +        result_unravelled_indices = duck_array_ops.unravel_index(
    +            result_flat_indices.data, reduce_shape
    +        )
    +
    +        result = {
    +            d: Variable(dims=result_dims, data=i)
    +            for d, i in zip(dim, result_unravelled_indices)
    +        }
    +
    +        if keep_attrs is None:
    +            keep_attrs = _get_keep_attrs(default=False)
    +        if keep_attrs:
    +            for v in result.values():
    +                v.attrs = self.attrs
    +
    +        return result
    +
    +    def argmin(
    +        self,
    +        dim: Union[Hashable, Sequence[Hashable]] = None,
    +        axis: int = None,
    +        keep_attrs: bool = None,
    +        skipna: bool = None,
    +    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
    +        """Index or indices of the minimum of the Variable over one or more dimensions.
    +        If a sequence is passed to 'dim', then result returned as dict of Variables,
    +        which can be passed directly to isel(). If a single str is passed to 'dim' then
    +        returns a Variable with dtype int.
    +
    +        If there are multiple minima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : hashable, sequence of hashable or ..., optional
    +            The dimensions over which to find the minimum. By default, finds minimum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will return a dict with indices for all
    +            dimensions; to return a dict with all dimensions now, pass '...'.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : Variable or dict of Variable
    +
    +        See also
    +        --------
    +        DataArray.argmin, DataArray.idxmin
    +        """
    +        return self._unravel_argminmax("argmin", dim, axis, keep_attrs, skipna)
    +
    +    def argmax(
    +        self,
    +        dim: Union[Hashable, Sequence[Hashable]] = None,
    +        axis: int = None,
    +        keep_attrs: bool = None,
    +        skipna: bool = None,
    +    ) -> Union["Variable", Dict[Hashable, "Variable"]]:
    +        """Index or indices of the maximum of the Variable over one or more dimensions.
    +        If a sequence is passed to 'dim', then result returned as dict of Variables,
    +        which can be passed directly to isel(). If a single str is passed to 'dim' then
    +        returns a Variable with dtype int.
    +
    +        If there are multiple maxima, the indices of the first one found will be
    +        returned.
    +
    +        Parameters
    +        ----------
    +        dim : hashable, sequence of hashable or ..., optional
    +            The dimensions over which to find the maximum. By default, finds maximum over
    +            all dimensions - for now returning an int for backward compatibility, but
    +            this is deprecated, in future will return a dict with indices for all
    +            dimensions; to return a dict with all dimensions now, pass '...'.
    +        axis : int, optional
    +            Axis over which to apply `argmin`. Only one of the 'dim' and 'axis' arguments
    +            can be supplied.
    +        keep_attrs : bool, optional
    +            If True, the attributes (`attrs`) will be copied from the original
    +            object to the new one.  If False (default), the new object will be
    +            returned without attributes.
    +        skipna : bool, optional
    +            If True, skip missing values (as marked by NaN). By default, only
    +            skips missing values for float dtypes; other dtypes either do not
    +            have a sentinel missing value (int) or skipna=True has not been
    +            implemented (object, datetime64 or timedelta64).
    +
    +        Returns
    +        -------
    +        result : Variable or dict of Variable
    +
    +        See also
    +        --------
    +        DataArray.argmax, DataArray.idxmax
    +        """
    +        return self._unravel_argminmax("argmax", dim, axis, keep_attrs, skipna)
    +
     
     ops.inject_all_ops_and_reduce_methods(Variable)
     
    diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
    index d942667a4c7..793090cc122 100644
    --- a/xarray/tests/test_dataarray.py
    +++ b/xarray/tests/test_dataarray.py
    @@ -4493,6 +4493,9 @@ def test_max(self, x, minindex, maxindex, nanindex):
     
             assert_identical(result2, expected2)
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
         def test_argmin(self, x, minindex, maxindex, nanindex):
             ar = xr.DataArray(
                 x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
    @@ -4522,6 +4525,9 @@ def test_argmin(self, x, minindex, maxindex, nanindex):
     
             assert_identical(result2, expected2)
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
         def test_argmax(self, x, minindex, maxindex, nanindex):
             ar = xr.DataArray(
                 x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
    @@ -4763,6 +4769,78 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask):
             result7 = ar0.idxmax(fill_value=-1j)
             assert_identical(result7, expected7)
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
    +    def test_argmin_dim(self, x, minindex, maxindex, nanindex):
    +        ar = xr.DataArray(
    +            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
    +        )
    +        indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
    +
    +        if np.isnan(minindex):
    +            with pytest.raises(ValueError):
    +                ar.argmin()
    +            return
    +
    +        expected0 = {"x": indarr[minindex]}
    +        result0 = ar.argmin(...)
    +        for key in expected0:
    +            assert_identical(result0[key], expected0[key])
    +
    +        result1 = ar.argmin(..., keep_attrs=True)
    +        expected1 = deepcopy(expected0)
    +        for da in expected1.values():
    +            da.attrs = self.attrs
    +        for key in expected1:
    +            assert_identical(result1[key], expected1[key])
    +
    +        result2 = ar.argmin(..., skipna=False)
    +        if nanindex is not None and ar.dtype.kind != "O":
    +            expected2 = {"x": indarr.isel(x=nanindex, drop=True)}
    +            expected2["x"].attrs = {}
    +        else:
    +            expected2 = expected0
    +
    +        for key in expected2:
    +            assert_identical(result2[key], expected2[key])
    +
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
    +    def test_argmax_dim(self, x, minindex, maxindex, nanindex):
    +        ar = xr.DataArray(
    +            x, dims=["x"], coords={"x": np.arange(x.size) * 4}, attrs=self.attrs
    +        )
    +        indarr = xr.DataArray(np.arange(x.size, dtype=np.intp), dims=["x"])
    +
    +        if np.isnan(maxindex):
    +            with pytest.raises(ValueError):
    +                ar.argmax()
    +            return
    +
    +        expected0 = {"x": indarr[maxindex]}
    +        result0 = ar.argmax(...)
    +        for key in expected0:
    +            assert_identical(result0[key], expected0[key])
    +
    +        result1 = ar.argmax(..., keep_attrs=True)
    +        expected1 = deepcopy(expected0)
    +        for da in expected1.values():
    +            da.attrs = self.attrs
    +        for key in expected1:
    +            assert_identical(result1[key], expected1[key])
    +
    +        result2 = ar.argmax(..., skipna=False)
    +        if nanindex is not None and ar.dtype.kind != "O":
    +            expected2 = {"x": indarr.isel(x=nanindex, drop=True)}
    +            expected2["x"].attrs = {}
    +        else:
    +            expected2 = expected0
    +
    +        for key in expected2:
    +            assert_identical(result2[key], expected2[key])
    +
     
     @pytest.mark.parametrize(
         "x, minindex, maxindex, nanindex",
    @@ -5256,6 +5334,751 @@ def test_idxmax(self, x, minindex, maxindex, nanindex, use_dask):
                 result7 = ar0.idxmax(dim="x", fill_value=-5j)
             assert_identical(result7, expected7)
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
    +    def test_argmin_dim(self, x, minindex, maxindex, nanindex):
    +        ar = xr.DataArray(
    +            x,
    +            dims=["y", "x"],
    +            coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
    +            attrs=self.attrs,
    +        )
    +        indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
    +        indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords)
    +
    +        if np.isnan(minindex).any():
    +            with pytest.raises(ValueError):
    +                ar.argmin(dim="x")
    +            return
    +
    +        expected0 = [
    +            indarr.isel(y=yi).isel(x=indi, drop=True)
    +            for yi, indi in enumerate(minindex)
    +        ]
    +        expected0 = {"x": xr.concat(expected0, dim="y")}
    +
    +        result0 = ar.argmin(dim=["x"])
    +        for key in expected0:
    +            assert_identical(result0[key], expected0[key])
    +
    +        result1 = ar.argmin(dim=["x"], keep_attrs=True)
    +        expected1 = deepcopy(expected0)
    +        expected1["x"].attrs = self.attrs
    +        for key in expected1:
    +            assert_identical(result1[key], expected1[key])
    +
    +        minindex = [
    +            x if y is None or ar.dtype.kind == "O" else y
    +            for x, y in zip(minindex, nanindex)
    +        ]
    +        expected2 = [
    +            indarr.isel(y=yi).isel(x=indi, drop=True)
    +            for yi, indi in enumerate(minindex)
    +        ]
    +        expected2 = {"x": xr.concat(expected2, dim="y")}
    +        expected2["x"].attrs = {}
    +
    +        result2 = ar.argmin(dim=["x"], skipna=False)
    +
    +        for key in expected2:
    +            assert_identical(result2[key], expected2[key])
    +
    +        result3 = ar.argmin(...)
    +        min_xind = ar.isel(expected0).argmin()
    +        expected3 = {
    +            "y": DataArray(min_xind),
    +            "x": DataArray(minindex[min_xind.item()]),
    +        }
    +
    +        for key in expected3:
    +            assert_identical(result3[key], expected3[key])
    +
    +    @pytest.mark.filterwarnings(
    +        "ignore:Behaviour of argmin/argmax with neither dim nor :DeprecationWarning"
    +    )
    +    def test_argmax_dim(self, x, minindex, maxindex, nanindex):
    +        ar = xr.DataArray(
    +            x,
    +            dims=["y", "x"],
    +            coords={"x": np.arange(x.shape[1]) * 4, "y": 1 - np.arange(x.shape[0])},
    +            attrs=self.attrs,
    +        )
    +        indarr = np.tile(np.arange(x.shape[1], dtype=np.intp), [x.shape[0], 1])
    +        indarr = xr.DataArray(indarr, dims=ar.dims, coords=ar.coords)
    +
    +        if np.isnan(maxindex).any():
    +            with pytest.raises(ValueError):
    +                ar.argmax(dim="x")
    +            return
    +
    +        expected0 = [
    +            indarr.isel(y=yi).isel(x=indi, drop=True)
    +            for yi, indi in enumerate(maxindex)
    +        ]
    +        expected0 = {"x": xr.concat(expected0, dim="y")}
    +
    +        result0 = ar.argmax(dim=["x"])
    +        for key in expected0:
    +            assert_identical(result0[key], expected0[key])
    +
    +        result1 = ar.argmax(dim=["x"], keep_attrs=True)
    +        expected1 = deepcopy(expected0)
    +        expected1["x"].attrs = self.attrs
    +        for key in expected1:
    +            assert_identical(result1[key], expected1[key])
    +
    +        maxindex = [
    +            x if y is None or ar.dtype.kind == "O" else y
    +            for x, y in zip(maxindex, nanindex)
    +        ]
    +        expected2 = [
    +            indarr.isel(y=yi).isel(x=indi, drop=True)
    +            for yi, indi in enumerate(maxindex)
    +        ]
    +        expected2 = {"x": xr.concat(expected2, dim="y")}
    +        expected2["x"].attrs = {}
    +
    +        result2 = ar.argmax(dim=["x"], skipna=False)
    +
    +        for key in expected2:
    +            assert_identical(result2[key], expected2[key])
    +
    +        result3 = ar.argmax(...)
    +        max_xind = ar.isel(expected0).argmax()
    +        expected3 = {
    +            "y": DataArray(max_xind),
    +            "x": DataArray(maxindex[max_xind.item()]),
    +        }
    +
    +        for key in expected3:
    +            assert_identical(result3[key], expected3[key])
    +
    +
    +@pytest.mark.parametrize(
    +    "x, minindices_x, minindices_y, minindices_z, minindices_xy, "
    +    "minindices_xz, minindices_yz, minindices_xyz, maxindices_x, "
    +    "maxindices_y, maxindices_z, maxindices_xy, maxindices_xz, maxindices_yz, "
    +    "maxindices_xyz, nanindices_x, nanindices_y, nanindices_z, nanindices_xy, "
    +    "nanindices_xz, nanindices_yz, nanindices_xyz",
    +    [
    +        (
    +            np.array(
    +                [
    +                    [[0, 1, 2, 0], [-2, -4, 2, 0]],
    +                    [[1, 1, 1, 1], [1, 1, 1, 1]],
    +                    [[0, 0, -10, 5], [20, 0, 0, 0]],
    +                ]
    +            ),
    +            {"x": np.array([[0, 2, 2, 0], [0, 0, 2, 0]])},
    +            {"y": np.array([[1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 1]])},
    +            {"z": np.array([[0, 1], [0, 0], [2, 1]])},
    +            {"x": np.array([0, 0, 2, 0]), "y": np.array([1, 1, 0, 0])},
    +            {"x": np.array([2, 0]), "z": np.array([2, 1])},
    +            {"y": np.array([1, 0, 0]), "z": np.array([1, 0, 2])},
    +            {"x": np.array(2), "y": np.array(0), "z": np.array(2)},
    +            {"x": np.array([[1, 0, 0, 2], [2, 1, 0, 1]])},
    +            {"y": np.array([[0, 0, 0, 0], [0, 0, 0, 0], [1, 0, 1, 0]])},
    +            {"z": np.array([[2, 2], [0, 0], [3, 0]])},
    +            {"x": np.array([2, 0, 0, 2]), "y": np.array([1, 0, 0, 0])},
    +            {"x": np.array([2, 2]), "z": np.array([3, 0])},
    +            {"y": np.array([0, 0, 1]), "z": np.array([2, 0, 0])},
    +            {"x": np.array(2), "y": np.array(1), "z": np.array(0)},
    +            {"x": np.array([[None, None, None, None], [None, None, None, None]])},
    +            {
    +                "y": np.array(
    +                    [
    +                        [None, None, None, None],
    +                        [None, None, None, None],
    +                        [None, None, None, None],
    +                    ]
    +                )
    +            },
    +            {"z": np.array([[None, None], [None, None], [None, None]])},
    +            {
    +                "x": np.array([None, None, None, None]),
    +                "y": np.array([None, None, None, None]),
    +            },
    +            {"x": np.array([None, None]), "z": np.array([None, None])},
    +            {"y": np.array([None, None, None]), "z": np.array([None, None, None])},
    +            {"x": np.array(None), "y": np.array(None), "z": np.array(None)},
    +        ),
    +        (
    +            np.array(
    +                [
    +                    [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]],
    +                    [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]],
    +                    [[np.NaN] * 4, [np.NaN] * 4],
    +                ]
    +            ),
    +            {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])},
    +            {
    +                "y": np.array(
    +                    [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
    +                )
    +            },
    +            {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])},
    +            {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])},
    +            {"x": np.array([1, 0]), "z": np.array([0, 1])},
    +            {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])},
    +            {"x": np.array(0), "y": np.array(1), "z": np.array(1)},
    +            {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])},
    +            {
    +                "y": np.array(
    +                    [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
    +                )
    +            },
    +            {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])},
    +            {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])},
    +            {"x": np.array([0, 0]), "z": np.array([2, 2])},
    +            {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])},
    +            {"x": np.array(0), "y": np.array(0), "z": np.array(0)},
    +            {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])},
    +            {
    +                "y": np.array(
    +                    [[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]]
    +                )
    +            },
    +            {"z": np.array([[None, None], [1, None], [0, 0]])},
    +            {"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])},
    +            {"x": np.array([1, 2]), "z": np.array([1, 0])},
    +            {"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])},
    +            {"x": np.array(1), "y": np.array(0), "z": np.array(1)},
    +        ),
    +        (
    +            np.array(
    +                [
    +                    [[2.0, 1.0, 2.0, 0.0], [-2.0, -4.0, 2.0, 0.0]],
    +                    [[-4.0, np.NaN, 2.0, np.NaN], [-2.0, -4.0, 2.0, 0.0]],
    +                    [[np.NaN] * 4, [np.NaN] * 4],
    +                ]
    +            ).astype("object"),
    +            {"x": np.array([[1, 0, 0, 0], [0, 0, 0, 0]])},
    +            {
    +                "y": np.array(
    +                    [[1, 1, 0, 0], [0, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
    +                )
    +            },
    +            {"z": np.array([[3, 1], [0, 1], [np.NaN, np.NaN]])},
    +            {"x": np.array([1, 0, 0, 0]), "y": np.array([0, 1, 0, 0])},
    +            {"x": np.array([1, 0]), "z": np.array([0, 1])},
    +            {"y": np.array([1, 0, np.NaN]), "z": np.array([1, 0, np.NaN])},
    +            {"x": np.array(0), "y": np.array(1), "z": np.array(1)},
    +            {"x": np.array([[0, 0, 0, 0], [0, 0, 0, 0]])},
    +            {
    +                "y": np.array(
    +                    [[0, 0, 0, 0], [1, 1, 0, 1], [np.NaN, np.NaN, np.NaN, np.NaN]]
    +                )
    +            },
    +            {"z": np.array([[0, 2], [2, 2], [np.NaN, np.NaN]])},
    +            {"x": np.array([0, 0, 0, 0]), "y": np.array([0, 0, 0, 0])},
    +            {"x": np.array([0, 0]), "z": np.array([2, 2])},
    +            {"y": np.array([0, 0, np.NaN]), "z": np.array([0, 2, np.NaN])},
    +            {"x": np.array(0), "y": np.array(0), "z": np.array(0)},
    +            {"x": np.array([[2, 1, 2, 1], [2, 2, 2, 2]])},
    +            {
    +                "y": np.array(
    +                    [[None, None, None, None], [None, 0, None, 0], [0, 0, 0, 0]]
    +                )
    +            },
    +            {"z": np.array([[None, None], [1, None], [0, 0]])},
    +            {"x": np.array([2, 1, 2, 1]), "y": np.array([0, 0, 0, 0])},
    +            {"x": np.array([1, 2]), "z": np.array([1, 0])},
    +            {"y": np.array([None, 0, 0]), "z": np.array([None, 1, 0])},
    +            {"x": np.array(1), "y": np.array(0), "z": np.array(1)},
    +        ),
    +        (
    +            np.array(
    +                [
    +                    [["2015-12-31", "2020-01-02"], ["2020-01-01", "2016-01-01"]],
    +                    [["2020-01-02", "2020-01-02"], ["2020-01-02", "2020-01-02"]],
    +                    [["1900-01-01", "1-02-03"], ["1900-01-02", "1-02-03"]],
    +                ],
    +                dtype="datetime64[ns]",
    +            ),
    +            {"x": np.array([[2, 2], [2, 2]])},
    +            {"y": np.array([[0, 1], [0, 0], [0, 0]])},
    +            {"z": np.array([[0, 1], [0, 0], [1, 1]])},
    +            {"x": np.array([2, 2]), "y": np.array([0, 0])},
    +            {"x": np.array([2, 2]), "z": np.array([1, 1])},
    +            {"y": np.array([0, 0, 0]), "z": np.array([0, 0, 1])},
    +            {"x": np.array(2), "y": np.array(0), "z": np.array(1)},
    +            {"x": np.array([[1, 0], [1, 1]])},
    +            {"y": np.array([[1, 0], [0, 0], [1, 0]])},
    +            {"z": np.array([[1, 0], [0, 0], [0, 0]])},
    +            {"x": np.array([1, 0]), "y": np.array([0, 0])},
    +            {"x": np.array([0, 1]), "z": np.array([1, 0])},
    +            {"y": np.array([0, 0, 1]), "z": np.array([1, 0, 0])},
    +            {"x": np.array(0), "y": np.array(0), "z": np.array(1)},
    +            {"x": np.array([[None, None], [None, None]])},
    +            {"y": np.array([[None, None], [None, None], [None, None]])},
    +            {"z": np.array([[None, None], [None, None], [None, None]])},
    +            {"x": np.array([None, None]), "y": np.array([None, None])},
    +            {"x": np.array([None, None]), "z": np.array([None, None])},
    +            {"y": np.array([None, None, None]), "z": np.array([None, None, None])},
    +            {"x": np.array(None), "y": np.array(None), "z": np.array(None)},
    +        ),
    +    ],
    +)
    +class TestReduce3D(TestReduce):
    +    def test_argmin_dim(
    +        self,
    +        x,
    +        minindices_x,
    +        minindices_y,
    +        minindices_z,
    +        minindices_xy,
    +        minindices_xz,
    +        minindices_yz,
    +        minindices_xyz,
    +        maxindices_x,
    +        maxindices_y,
    +        maxindices_z,
    +        maxindices_xy,
    +        maxindices_xz,
    +        maxindices_yz,
    +        maxindices_xyz,
    +        nanindices_x,
    +        nanindices_y,
    +        nanindices_z,
    +        nanindices_xy,
    +        nanindices_xz,
    +        nanindices_yz,
    +        nanindices_xyz,
    +    ):
    +
    +        ar = xr.DataArray(
    +            x,
    +            dims=["x", "y", "z"],
    +            coords={
    +                "x": np.arange(x.shape[0]) * 4,
    +                "y": 1 - np.arange(x.shape[1]),
    +                "z": 2 + 3 * np.arange(x.shape[2]),
    +            },
    +            attrs=self.attrs,
    +        )
    +        xindarr = np.tile(
    +            np.arange(x.shape[0], dtype=np.intp)[:, np.newaxis, np.newaxis],
    +            [1, x.shape[1], x.shape[2]],
    +        )
    +        xindarr = xr.DataArray(xindarr, dims=ar.dims, coords=ar.coords)
    +        yindarr = np.tile(
    +            np.arange(x.shape[1], dtype=np.intp)[np.newaxis, :, np.newaxis],
    +            [x.shape[0], 1, x.shape[2]],
    +        )
    +        yindarr = xr.DataArray(yindarr, dims=ar.dims, coords=ar.coords)
    +        zindarr = np.tile(
    +            np.arange(x.shape[2], dtype=np.intp)[np.newaxis, np.newaxis, :],
    +            [x.shape[0], x.shape[1], 1],
    +        )
    +        zindarr = xr.DataArray(zindarr, dims=ar.dims, coords=ar.coords)
    +
    +        for inds in [
    +            minindices_x,
    +            minindices_y,
    +            minindices_z,
    +            minindices_xy,
    +            minindices_xz,
    +            minindices_yz,
    +            minindices_xyz,
    +        ]:
    +            if np.array([np.isnan(i) for i in inds.values()]).any():
    +                with pytest.raises(ValueError):
    +                    ar.argmin(dim=[d for d in inds])
    +                return
    +
    +        result0 = ar.argmin(dim=["x"])
    +        expected0 = {
    +            key: xr.DataArray(value, dims=("y", "z"))
    +            for key, value in minindices_x.items()
    +        }
    +        for key in expected0:
    +            assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key])
    +
    +        result1 = ar.argmin(dim=["y"])
    +        expected1 = {
    +            key: xr.DataArray(value, dims=("x", "z"))
    +            for key, value in minindices_y.items()
    +        }
    +        for key in expected1:
    +            assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key])
    +
    +        result2 = ar.argmin(dim=["z"])
    +        expected2 = {
    +            key: xr.DataArray(value, dims=("x", "y"))
    +            for key, value in minindices_z.items()
    +        }
    +        for key in expected2:
    +            assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key])
    +
    +        result3 = ar.argmin(dim=("x", "y"))
    +        expected3 = {
    +            key: xr.DataArray(value, dims=("z")) for key, value in minindices_xy.items()
    +        }
    +        for key in expected3:
    +            assert_identical(result3[key].drop_vars("z"), expected3[key])
    +
    +        result4 = ar.argmin(dim=("x", "z"))
    +        expected4 = {
    +            key: xr.DataArray(value, dims=("y")) for key, value in minindices_xz.items()
    +        }
    +        for key in expected4:
    +            assert_identical(result4[key].drop_vars("y"), expected4[key])
    +
    +        result5 = ar.argmin(dim=("y", "z"))
    +        expected5 = {
    +            key: xr.DataArray(value, dims=("x")) for key, value in minindices_yz.items()
    +        }
    +        for key in expected5:
    +            assert_identical(result5[key].drop_vars("x"), expected5[key])
    +
    +        result6 = ar.argmin(...)
    +        expected6 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()}
    +        for key in expected6:
    +            assert_identical(result6[key], expected6[key])
    +
    +        minindices_x = {
    +            key: xr.where(
    +                nanindices_x[key] == None,  # noqa: E711
    +                minindices_x[key],
    +                nanindices_x[key],
    +            )
    +            for key in minindices_x
    +        }
    +        expected7 = {
    +            key: xr.DataArray(value, dims=("y", "z"))
    +            for key, value in minindices_x.items()
    +        }
    +
    +        result7 = ar.argmin(dim=["x"], skipna=False)
    +        for key in expected7:
    +            assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key])
    +
    +        minindices_y = {
    +            key: xr.where(
    +                nanindices_y[key] == None,  # noqa: E711
    +                minindices_y[key],
    +                nanindices_y[key],
    +            )
    +            for key in minindices_y
    +        }
    +        expected8 = {
    +            key: xr.DataArray(value, dims=("x", "z"))
    +            for key, value in minindices_y.items()
    +        }
    +
    +        result8 = ar.argmin(dim=["y"], skipna=False)
    +        for key in expected8:
    +            assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key])
    +
    +        minindices_z = {
    +            key: xr.where(
    +                nanindices_z[key] == None,  # noqa: E711
    +                minindices_z[key],
    +                nanindices_z[key],
    +            )
    +            for key in minindices_z
    +        }
    +        expected9 = {
    +            key: xr.DataArray(value, dims=("x", "y"))
    +            for key, value in minindices_z.items()
    +        }
    +
    +        result9 = ar.argmin(dim=["z"], skipna=False)
    +        for key in expected9:
    +            assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key])
    +
    +        minindices_xy = {
    +            key: xr.where(
    +                nanindices_xy[key] == None,  # noqa: E711
    +                minindices_xy[key],
    +                nanindices_xy[key],
    +            )
    +            for key in minindices_xy
    +        }
    +        expected10 = {
    +            key: xr.DataArray(value, dims="z") for key, value in minindices_xy.items()
    +        }
    +
    +        result10 = ar.argmin(dim=("x", "y"), skipna=False)
    +        for key in expected10:
    +            assert_identical(result10[key].drop_vars("z"), expected10[key])
    +
    +        minindices_xz = {
    +            key: xr.where(
    +                nanindices_xz[key] == None,  # noqa: E711
    +                minindices_xz[key],
    +                nanindices_xz[key],
    +            )
    +            for key in minindices_xz
    +        }
    +        expected11 = {
    +            key: xr.DataArray(value, dims="y") for key, value in minindices_xz.items()
    +        }
    +
    +        result11 = ar.argmin(dim=("x", "z"), skipna=False)
    +        for key in expected11:
    +            assert_identical(result11[key].drop_vars("y"), expected11[key])
    +
    +        minindices_yz = {
    +            key: xr.where(
    +                nanindices_yz[key] == None,  # noqa: E711
    +                minindices_yz[key],
    +                nanindices_yz[key],
    +            )
    +            for key in minindices_yz
    +        }
    +        expected12 = {
    +            key: xr.DataArray(value, dims="x") for key, value in minindices_yz.items()
    +        }
    +
    +        result12 = ar.argmin(dim=("y", "z"), skipna=False)
    +        for key in expected12:
    +            assert_identical(result12[key].drop_vars("x"), expected12[key])
    +
    +        minindices_xyz = {
    +            key: xr.where(
    +                nanindices_xyz[key] == None,  # noqa: E711
    +                minindices_xyz[key],
    +                nanindices_xyz[key],
    +            )
    +            for key in minindices_xyz
    +        }
    +        expected13 = {key: xr.DataArray(value) for key, value in minindices_xyz.items()}
    +
    +        result13 = ar.argmin(..., skipna=False)
    +        for key in expected13:
    +            assert_identical(result13[key], expected13[key])
    +
    +    def test_argmax_dim(
    +        self,
    +        x,
    +        minindices_x,
    +        minindices_y,
    +        minindices_z,
    +        minindices_xy,
    +        minindices_xz,
    +        minindices_yz,
    +        minindices_xyz,
    +        maxindices_x,
    +        maxindices_y,
    +        maxindices_z,
    +        maxindices_xy,
    +        maxindices_xz,
    +        maxindices_yz,
    +        maxindices_xyz,
    +        nanindices_x,
    +        nanindices_y,
    +        nanindices_z,
    +        nanindices_xy,
    +        nanindices_xz,
    +        nanindices_yz,
    +        nanindices_xyz,
    +    ):
    +
    +        ar = xr.DataArray(
    +            x,
    +            dims=["x", "y", "z"],
    +            coords={
    +                "x": np.arange(x.shape[0]) * 4,
    +                "y": 1 - np.arange(x.shape[1]),
    +                "z": 2 + 3 * np.arange(x.shape[2]),
    +            },
    +            attrs=self.attrs,
    +        )
    +        xindarr = np.tile(
    +            np.arange(x.shape[0], dtype=np.intp)[:, np.newaxis, np.newaxis],
    +            [1, x.shape[1], x.shape[2]],
    +        )
    +        xindarr = xr.DataArray(xindarr, dims=ar.dims, coords=ar.coords)
    +        yindarr = np.tile(
    +            np.arange(x.shape[1], dtype=np.intp)[np.newaxis, :, np.newaxis],
    +            [x.shape[0], 1, x.shape[2]],
    +        )
    +        yindarr = xr.DataArray(yindarr, dims=ar.dims, coords=ar.coords)
    +        zindarr = np.tile(
    +            np.arange(x.shape[2], dtype=np.intp)[np.newaxis, np.newaxis, :],
    +            [x.shape[0], x.shape[1], 1],
    +        )
    +        zindarr = xr.DataArray(zindarr, dims=ar.dims, coords=ar.coords)
    +
    +        for inds in [
    +            maxindices_x,
    +            maxindices_y,
    +            maxindices_z,
    +            maxindices_xy,
    +            maxindices_xz,
    +            maxindices_yz,
    +            maxindices_xyz,
    +        ]:
    +            if np.array([np.isnan(i) for i in inds.values()]).any():
    +                with pytest.raises(ValueError):
    +                    ar.argmax(dim=[d for d in inds])
    +                return
    +
    +        result0 = ar.argmax(dim=["x"])
    +        expected0 = {
    +            key: xr.DataArray(value, dims=("y", "z"))
    +            for key, value in maxindices_x.items()
    +        }
    +        for key in expected0:
    +            assert_identical(result0[key].drop_vars(["y", "z"]), expected0[key])
    +
    +        result1 = ar.argmax(dim=["y"])
    +        expected1 = {
    +            key: xr.DataArray(value, dims=("x", "z"))
    +            for key, value in maxindices_y.items()
    +        }
    +        for key in expected1:
    +            assert_identical(result1[key].drop_vars(["x", "z"]), expected1[key])
    +
    +        result2 = ar.argmax(dim=["z"])
    +        expected2 = {
    +            key: xr.DataArray(value, dims=("x", "y"))
    +            for key, value in maxindices_z.items()
    +        }
    +        for key in expected2:
    +            assert_identical(result2[key].drop_vars(["x", "y"]), expected2[key])
    +
    +        result3 = ar.argmax(dim=("x", "y"))
    +        expected3 = {
    +            key: xr.DataArray(value, dims=("z")) for key, value in maxindices_xy.items()
    +        }
    +        for key in expected3:
    +            assert_identical(result3[key].drop_vars("z"), expected3[key])
    +
    +        result4 = ar.argmax(dim=("x", "z"))
    +        expected4 = {
    +            key: xr.DataArray(value, dims=("y")) for key, value in maxindices_xz.items()
    +        }
    +        for key in expected4:
    +            assert_identical(result4[key].drop_vars("y"), expected4[key])
    +
    +        result5 = ar.argmax(dim=("y", "z"))
    +        expected5 = {
    +            key: xr.DataArray(value, dims=("x")) for key, value in maxindices_yz.items()
    +        }
    +        for key in expected5:
    +            assert_identical(result5[key].drop_vars("x"), expected5[key])
    +
    +        result6 = ar.argmax(...)
    +        expected6 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()}
    +        for key in expected6:
    +            assert_identical(result6[key], expected6[key])
    +
    +        maxindices_x = {
    +            key: xr.where(
    +                nanindices_x[key] == None,  # noqa: E711
    +                maxindices_x[key],
    +                nanindices_x[key],
    +            )
    +            for key in maxindices_x
    +        }
    +        expected7 = {
    +            key: xr.DataArray(value, dims=("y", "z"))
    +            for key, value in maxindices_x.items()
    +        }
    +
    +        result7 = ar.argmax(dim=["x"], skipna=False)
    +        for key in expected7:
    +            assert_identical(result7[key].drop_vars(["y", "z"]), expected7[key])
    +
    +        maxindices_y = {
    +            key: xr.where(
    +                nanindices_y[key] == None,  # noqa: E711
    +                maxindices_y[key],
    +                nanindices_y[key],
    +            )
    +            for key in maxindices_y
    +        }
    +        expected8 = {
    +            key: xr.DataArray(value, dims=("x", "z"))
    +            for key, value in maxindices_y.items()
    +        }
    +
    +        result8 = ar.argmax(dim=["y"], skipna=False)
    +        for key in expected8:
    +            assert_identical(result8[key].drop_vars(["x", "z"]), expected8[key])
    +
    +        maxindices_z = {
    +            key: xr.where(
    +                nanindices_z[key] == None,  # noqa: E711
    +                maxindices_z[key],
    +                nanindices_z[key],
    +            )
    +            for key in maxindices_z
    +        }
    +        expected9 = {
    +            key: xr.DataArray(value, dims=("x", "y"))
    +            for key, value in maxindices_z.items()
    +        }
    +
    +        result9 = ar.argmax(dim=["z"], skipna=False)
    +        for key in expected9:
    +            assert_identical(result9[key].drop_vars(["x", "y"]), expected9[key])
    +
    +        maxindices_xy = {
    +            key: xr.where(
    +                nanindices_xy[key] == None,  # noqa: E711
    +                maxindices_xy[key],
    +                nanindices_xy[key],
    +            )
    +            for key in maxindices_xy
    +        }
    +        expected10 = {
    +            key: xr.DataArray(value, dims="z") for key, value in maxindices_xy.items()
    +        }
    +
    +        result10 = ar.argmax(dim=("x", "y"), skipna=False)
    +        for key in expected10:
    +            assert_identical(result10[key].drop_vars("z"), expected10[key])
    +
    +        maxindices_xz = {
    +            key: xr.where(
    +                nanindices_xz[key] == None,  # noqa: E711
    +                maxindices_xz[key],
    +                nanindices_xz[key],
    +            )
    +            for key in maxindices_xz
    +        }
    +        expected11 = {
    +            key: xr.DataArray(value, dims="y") for key, value in maxindices_xz.items()
    +        }
    +
    +        result11 = ar.argmax(dim=("x", "z"), skipna=False)
    +        for key in expected11:
    +            assert_identical(result11[key].drop_vars("y"), expected11[key])
    +
    +        maxindices_yz = {
    +            key: xr.where(
    +                nanindices_yz[key] == None,  # noqa: E711
    +                maxindices_yz[key],
    +                nanindices_yz[key],
    +            )
    +            for key in maxindices_yz
    +        }
    +        expected12 = {
    +            key: xr.DataArray(value, dims="x") for key, value in maxindices_yz.items()
    +        }
    +
    +        result12 = ar.argmax(dim=("y", "z"), skipna=False)
    +        for key in expected12:
    +            assert_identical(result12[key].drop_vars("x"), expected12[key])
    +
    +        maxindices_xyz = {
    +            key: xr.where(
    +                nanindices_xyz[key] == None,  # noqa: E711
    +                maxindices_xyz[key],
    +                nanindices_xyz[key],
    +            )
    +            for key in maxindices_xyz
    +        }
    +        expected13 = {key: xr.DataArray(value) for key, value in maxindices_xyz.items()}
    +
    +        result13 = ar.argmax(..., skipna=False)
    +        for key in expected13:
    +            assert_identical(result13[key], expected13[key])
    +
     
     class TestReduceND(TestReduce):
         @pytest.mark.parametrize("op", ["idxmin", "idxmax"])
    diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
    index 9c8d40724da..0c4082a553e 100644
    --- a/xarray/tests/test_dataset.py
    +++ b/xarray/tests/test_dataset.py
    @@ -4597,6 +4597,9 @@ def test_reduce_non_numeric(self):
             assert_equal(data1.mean(), data2.mean())
             assert_equal(data1.mean(dim="dim1"), data2.mean(dim="dim1"))
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Once the behaviour of DataArray:DeprecationWarning"
    +    )
         def test_reduce_strings(self):
             expected = Dataset({"x": "a"})
             ds = Dataset({"x": ("y", ["a", "b"])})
    @@ -4668,6 +4671,9 @@ def test_reduce_keep_attrs(self):
             for k, v in ds.data_vars.items():
                 assert v.attrs == data[k].attrs
     
    +    @pytest.mark.filterwarnings(
    +        "ignore:Once the behaviour of DataArray:DeprecationWarning"
    +    )
         def test_reduce_argmin(self):
             # regression test for #205
             ds = Dataset({"a": ("x", [0, 1])})
    diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py
    index fb9063ca49e..20a5f0e8613 100644
    --- a/xarray/tests/test_units.py
    +++ b/xarray/tests/test_units.py
    @@ -297,19 +297,29 @@ def __call__(self, obj, *args, **kwargs):
             all_args = merge_args(self.args, args)
             all_kwargs = {**self.kwargs, **kwargs}
     
    +        xarray_classes = (
    +            xr.Variable,
    +            xr.DataArray,
    +            xr.Dataset,
    +            xr.core.groupby.GroupBy,
    +        )
    +
    +        if not isinstance(obj, xarray_classes):
    +            # remove typical xarray args like "dim"
    +            exclude_kwargs = ("dim", "dims")
    +            all_kwargs = {
    +                key: value
    +                for key, value in all_kwargs.items()
    +                if key not in exclude_kwargs
    +            }
    +
             func = getattr(obj, self.name, None)
    +
             if func is None or not isinstance(func, Callable):
                 # fall back to module level numpy functions if not a xarray object
                 if not isinstance(obj, (xr.Variable, xr.DataArray, xr.Dataset)):
                     numpy_func = getattr(np, self.name)
                     func = partial(numpy_func, obj)
    -                # remove typical xarray args like "dim"
    -                exclude_kwargs = ("dim", "dims")
    -                all_kwargs = {
    -                    key: value
    -                    for key, value in all_kwargs.items()
    -                    if key not in exclude_kwargs
    -                }
                 else:
                     raise AttributeError(f"{obj} has no method named '{self.name}'")
     
    @@ -1408,8 +1418,8 @@ def test_real_and_imag(self):
             (
                 method("all"),
                 method("any"),
    -            method("argmax"),
    -            method("argmin"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("argsort"),
                 method("cumprod"),
                 method("cumsum"),
    @@ -1433,7 +1443,11 @@ def test_aggregation(self, func, dtype):
             )
             variable = xr.Variable("x", array)
     
    -        units = extract_units(func(array))
    +        numpy_kwargs = func.kwargs.copy()
    +        if "dim" in func.kwargs:
    +            numpy_kwargs["axis"] = variable.get_axis_num(numpy_kwargs.pop("dim"))
    +
    +        units = extract_units(func(array, **numpy_kwargs))
             expected = attach_units(func(strip_units(variable)), units)
             actual = func(variable)
     
    @@ -2243,8 +2257,20 @@ def test_repr(self, func, variant, dtype):
             (
                 function("all"),
                 function("any"),
    -            function("argmax"),
    -            function("argmin"),
    +            pytest.param(
    +                function("argmax"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmax as a function on xarray objects is not "
    +                    "supported"
    +                ),
    +            ),
    +            pytest.param(
    +                function("argmin"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmin as a function on xarray objects is not "
    +                    "supported"
    +                ),
    +            ),
                 function("max"),
                 function("mean"),
                 pytest.param(
    @@ -2265,8 +2291,8 @@ def test_repr(self, func, variant, dtype):
                 function("cumprod"),
                 method("all"),
                 method("any"),
    -            method("argmax"),
    -            method("argmin"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("max"),
                 method("mean"),
                 method("median"),
    @@ -2289,6 +2315,10 @@ def test_aggregation(self, func, dtype):
             )
             data_array = xr.DataArray(data=array, dims="x")
     
    +        numpy_kwargs = func.kwargs.copy()
    +        if "dim" in numpy_kwargs:
    +            numpy_kwargs["axis"] = data_array.get_axis_num(numpy_kwargs.pop("dim"))
    +
             # units differ based on the applied function, so we need to
             # first compute the units
             units = extract_units(func(array))
    @@ -3803,8 +3833,20 @@ def test_repr(self, func, variant, dtype):
             (
                 function("all"),
                 function("any"),
    -            function("argmax"),
    -            function("argmin"),
    +            pytest.param(
    +                function("argmax"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmax as a function on xarray objects is not "
    +                    "supported"
    +                ),
    +            ),
    +            pytest.param(
    +                function("argmin"),
    +                marks=pytest.mark.skip(
    +                    reason="calling np.argmin as a function on xarray objects is not "
    +                    "supported"
    +                ),
    +            ),
                 function("max"),
                 function("min"),
                 function("mean"),
    @@ -3823,8 +3865,8 @@ def test_repr(self, func, variant, dtype):
                 function("cumprod"),
                 method("all"),
                 method("any"),
    -            method("argmax"),
    -            method("argmin"),
    +            method("argmax", dim="x"),
    +            method("argmin", dim="x"),
                 method("max"),
                 method("min"),
                 method("mean"),
    @@ -3853,8 +3895,23 @@ def test_aggregation(self, func, dtype):
     
             ds = xr.Dataset({"a": ("x", a), "b": ("x", b)})
     
    -        units_a = array_extract_units(func(a))
    -        units_b = array_extract_units(func(b))
    +        if "dim" in func.kwargs:
    +            numpy_kwargs = func.kwargs.copy()
    +            dim = numpy_kwargs.pop("dim")
    +
    +            axis_a = ds.a.get_axis_num(dim)
    +            axis_b = ds.b.get_axis_num(dim)
    +
    +            numpy_kwargs_a = numpy_kwargs.copy()
    +            numpy_kwargs_a["axis"] = axis_a
    +            numpy_kwargs_b = numpy_kwargs.copy()
    +            numpy_kwargs_b["axis"] = axis_b
    +        else:
    +            numpy_kwargs_a = {}
    +            numpy_kwargs_b = {}
    +
    +        units_a = array_extract_units(func(a, **numpy_kwargs_a))
    +        units_b = array_extract_units(func(b, **numpy_kwargs_b))
             units = {"a": units_a, "b": units_b}
     
             actual = func(ds)
    diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
    index 3003e0d66f3..d79d40d67c0 100644
    --- a/xarray/tests/test_variable.py
    +++ b/xarray/tests/test_variable.py
    @@ -1657,7 +1657,7 @@ def test_reduce_funcs(self):
             assert_identical(v.all(dim="x"), Variable([], False))
     
             v = Variable("t", pd.date_range("2000-01-01", periods=3))
    -        assert v.argmax(skipna=True) == 2
    +        assert v.argmax(skipna=True, dim="t") == 2
     
             assert_identical(v.max(), Variable([], pd.Timestamp("2000-01-03")))
     
    
    From 13be3e08a5810bb77c10f90330bcbb55888c362b Mon Sep 17 00:00:00 2001
    From: Wei Ji 
    Date: Tue, 30 Jun 2020 10:46:03 +1200
    Subject: [PATCH 69/71] Ensure class functions have necessary variables
    
    Was missing some 'self' and other kwarg variables. Also linted using black.
    ---
     xarray/backends/api.py  | 28 ++++++++++++++++------------
     xarray/backends/zarr.py | 11 ++++++-----
     2 files changed, 22 insertions(+), 17 deletions(-)
    
    diff --git a/xarray/backends/api.py b/xarray/backends/api.py
    index 7e81870d653..aca9f003bf9 100644
    --- a/xarray/backends/api.py
    +++ b/xarray/backends/api.py
    @@ -471,14 +471,14 @@ def maybe_decode_store(store, lock=False):
             store = filename_or_obj
     
         if isinstance(filename_or_obj, MutableMapping):
    -        if engine == 'zarr':
    +        if engine == "zarr":
                 # on ZarrStore, mode='r', synchronizer=None, group=None,
                 # consolidated=False.
    -            overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
    +            overwrite_encoded_chunks = backend_kwargs.pop(
    +                "overwrite_encoded_chunks", None
    +            )
                 store = backends.ZarrStore.open_group(
    -                filename_or_obj,
    -                group=group,
    -                **backend_kwargs
    +                filename_or_obj, group=group, **backend_kwargs
                 )
     
         elif isinstance(filename_or_obj, str):
    @@ -508,14 +508,14 @@ def maybe_decode_store(store, lock=False):
                 store = backends.CfGribDataStore(
                     filename_or_obj, lock=lock, **backend_kwargs
                 )
    -        elif engine == 'zarr':
    +        elif engine == "zarr":
                 # on ZarrStore, mode='r', synchronizer=None, group=None,
                 # consolidated=False.
    -            overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
    +            overwrite_encoded_chunks = backend_kwargs.pop(
    +                "overwrite_encoded_chunks", None
    +            )
                 store = backends.ZarrStore.open_group(
    -                filename_or_obj,
    -                group=group,
    -                **backend_kwargs
    +                filename_or_obj, group=group, **backend_kwargs
                 )
         else:
             if engine not in [None, "scipy", "h5netcdf"]:
    @@ -541,7 +541,8 @@ def maybe_decode_store(store, lock=False):
     
         if chunks is not None:
             from dask.base import tokenize
    -        if engine != 'zarr':
    +
    +        if engine != "zarr":
     
                 # if passed an actual file path, augment the token with
                 # the file modification time
    @@ -579,7 +580,10 @@ def maybe_decode_store(store, lock=False):
                 if isinstance(chunks, int):
                     chunks = dict.fromkeys(ds.dims, chunks)
     
    -            variables = {k: backends.ZarrStore.open_group.maybe_chunk(k, v, chunks) for k, v in ds.variables.items()}
    +            variables = {
    +                k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
    +                for k, v in ds.variables.items()
    +            }
                 ds2 = ds._replace_vars_and_dims(variables)
             return ds2
         else:
    diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
    index c6e4d1b362a..7a30c04e6b4 100644
    --- a/xarray/backends/zarr.py
    +++ b/xarray/backends/zarr.py
    @@ -358,8 +358,7 @@ def encode_variable(self, variable):
         def encode_attribute(self, a):
             return encode_zarr_attr_value(a)
     
    -
    -    def get_chunk(name, var, chunks):
    +    def get_chunk(self, name, var, chunks):
             chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
     
             # Coordinate labels aren't chunked
    @@ -388,15 +387,17 @@ def get_chunk(name, var, chunks):
                     chunk_spec[dim] = chunks[dim]
             return chunk_spec
     
    -    def maybe_chunk(name, var, chunks):
    -        chunk_spec = get_chunk(name, var, chunks)
    +    def maybe_chunk(self, name, var, chunks, overwrite_encoded_chunks):
    +        chunk_spec = self.get_chunk(name, var, chunks)
     
             if (var.ndim > 0) and (chunk_spec is not None):
    +            from dask.base import tokenize
    +
                 # does this cause any data to be read?
                 token2 = tokenize(name, var._data)
                 name2 = "zarr-%s" % token2
                 var = var.chunk(chunk_spec, name=name2, lock=None)
    -            if overwrite_encoded_chunks and var.chunks is not None:
    +            if open_kwargs["overwrite_encoded_chunks"] and var.chunks is not None:
                     var.encoding["chunks"] = tuple(x[0] for x in var.chunk)
             return var
     
    
    From afbcf787f6ca736662d4d95285f88b2cee03185d Mon Sep 17 00:00:00 2001
    From: Wei Ji 
    Date: Tue, 30 Jun 2020 11:11:39 +1200
    Subject: [PATCH 70/71] Combine MutableMapping and Zarr engine condition
    
    As per https://github.com/pydata/xarray/pull/4003#discussion_r441978720.
    ---
     xarray/backends/api.py | 17 +++++++----------
     1 file changed, 7 insertions(+), 10 deletions(-)
    
    diff --git a/xarray/backends/api.py b/xarray/backends/api.py
    index 78fb9e213af..154cfa6acda 100644
    --- a/xarray/backends/api.py
    +++ b/xarray/backends/api.py
    @@ -476,16 +476,13 @@ def maybe_decode_store(store, lock=False):
         if isinstance(filename_or_obj, AbstractDataStore):
             store = filename_or_obj
     
    -    if isinstance(filename_or_obj, MutableMapping):
    -        if engine == "zarr":
    -            # on ZarrStore, mode='r', synchronizer=None, group=None,
    -            # consolidated=False.
    -            overwrite_encoded_chunks = backend_kwargs.pop(
    -                "overwrite_encoded_chunks", None
    -            )
    -            store = backends.ZarrStore.open_group(
    -                filename_or_obj, group=group, **backend_kwargs
    -            )
    +    if isinstance(filename_or_obj, MutableMapping) and engine == "zarr":
    +        # on ZarrStore, mode='r', synchronizer=None, group=None,
    +        # consolidated=False.
    +        overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
    +        store = backends.ZarrStore.open_group(
    +            filename_or_obj, group=group, **backend_kwargs
    +        )
     
         elif isinstance(filename_or_obj, str):
             filename_or_obj = _normalize_path(filename_or_obj)
    
    From cba93c333ff3c8965976544d7b6ab9ac41335fca Mon Sep 17 00:00:00 2001
    From: Wei Ji 
    Date: Tue, 30 Jun 2020 11:51:20 +1200
    Subject: [PATCH 71/71] Pop out overwrite_encoded_chunks after shallow copy
     backend_kwargs dict
    
    Don't pop the backend_kwargs dict as per https://github.com/pydata/xarray/pull/4003#discussion_r441979810, make a shallow copy of the backend_kwargs dictionary first. Also removed `overwrite_encoded_chunks` as a top level kwarg of `open_dataset`. Instead, pass it to `backend_kwargs` when using engine="zarr".
    ---
     xarray/backends/api.py | 13 ++++++-------
     1 file changed, 6 insertions(+), 7 deletions(-)
    
    diff --git a/xarray/backends/api.py b/xarray/backends/api.py
    index 154cfa6acda..610731568df 100644
    --- a/xarray/backends/api.py
    +++ b/xarray/backends/api.py
    @@ -388,9 +388,6 @@ def open_dataset(
             {'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}
             into timedelta objects. If False, leave them encoded as numbers.
             If None (default), assume the same value of decode_time.
    -    overwrite_encoded_chunks: bool, optional
    -        Whether to drop the zarr chunks encoded for each variable when a
    -        dataset is loaded with specified chunk sizes (default: False)
     
     
         Returns
    @@ -479,9 +476,10 @@ def maybe_decode_store(store, lock=False):
         if isinstance(filename_or_obj, MutableMapping) and engine == "zarr":
             # on ZarrStore, mode='r', synchronizer=None, group=None,
             # consolidated=False.
    -        overwrite_encoded_chunks = backend_kwargs.pop("overwrite_encoded_chunks", None)
    +        _backend_kwargs = backend_kwargs.copy()
    +        overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None)
             store = backends.ZarrStore.open_group(
    -            filename_or_obj, group=group, **backend_kwargs
    +            filename_or_obj, group=group, **_backend_kwargs
             )
     
         elif isinstance(filename_or_obj, str):
    @@ -514,11 +512,12 @@ def maybe_decode_store(store, lock=False):
             elif engine == "zarr":
                 # on ZarrStore, mode='r', synchronizer=None, group=None,
                 # consolidated=False.
    -            overwrite_encoded_chunks = backend_kwargs.pop(
    +            _backend_kwargs = backend_kwargs.copy()
    +            overwrite_encoded_chunks = _backend_kwargs.pop(
                     "overwrite_encoded_chunks", None
                 )
                 store = backends.ZarrStore.open_group(
    -                filename_or_obj, group=group, **backend_kwargs
    +                filename_or_obj, group=group, **_backend_kwargs
                 )
         else:
             if engine not in [None, "scipy", "h5netcdf"]: