From b35de8e42594518818e70c8bba574afb66efd6f7 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Thu, 10 Dec 2020 10:42:20 -0500 Subject: [PATCH 01/24] Added test for combine_by_coords changes. --- xarray/tests/test_combine.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 86c9615148e..9c12e91e18c 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -667,6 +667,13 @@ def test_combine_by_coords(self): def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) + def test_combine_coords_unnamed_arrays(self): + objs = [DataArray([0, 1], dims=('x'), coords=({'x': [0, 1]})), + DataArray([2, 3], dims=('x'), coords=({'x': [2, 3]}))] + expected = DataArray([0, 1, 2, 3], dims=('x'), coords=({'x': [0, 1, 2, 3]})) + actual = combine_by_coords(objs) + assert_identical(expected, actual) + @pytest.mark.parametrize( "join, expected", [ From f966e7696d22f95f267cf43a36a6a258877ae7d3 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Fri, 11 Dec 2020 16:54:42 -0500 Subject: [PATCH 02/24] Modified test case to expect a dataset instead of a DataArray. Added converter to combine_by_coords to check for all DataArray case and convert to datasets. --- xarray/core/combine.py | 21 +++++++++++++++++++-- xarray/tests/test_combine.py | 4 ++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 38a32758f66..0b7930e9aa4 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -541,8 +541,15 @@ def vars_as_keys(ds): return tuple(sorted(ds)) +def all_unnamed_data_arrays(data_objects): + for data_obj in data_objects: + if not isinstance(data_obj, DataArray) or data_obj.name is not None: + return False + return True + + def combine_by_coords( - datasets, + data_objects, compat="no_conflicts", data_vars="all", coords="different", @@ -576,8 +583,11 @@ def combine_by_coords( Parameters ---------- - datasets : sequence of xarray.Dataset + data_objects : sequence of xarray.Dataset or sequence of xarray.DataArray Dataset objects to combine. + + This may also be a sequence of xarray.DataArray without variable names. + If so, these arrays are assumed to belong to the same variable. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional String indicating how to compare variables of the same name for potential conflicts: @@ -750,6 +760,13 @@ def combine_by_coords( precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176 """ + # If a set of unnamed data arrays is provided, these arrays are assumed to belong + # to the same variable and should be combined. + if all_unnamed_data_arrays(data_objects): + datasets = [Dataset({'_': data_array}) for data_array in data_objects] + else: + datasets = data_objects + # Group by data vars sorted_datasets = sorted(datasets, key=vars_as_keys) grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 9c12e91e18c..20649bd1a53 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -670,10 +670,10 @@ def test_empty_input(self): def test_combine_coords_unnamed_arrays(self): objs = [DataArray([0, 1], dims=('x'), coords=({'x': [0, 1]})), DataArray([2, 3], dims=('x'), coords=({'x': [2, 3]}))] - expected = DataArray([0, 1, 2, 3], dims=('x'), coords=({'x': [0, 1, 2, 3]})) + expected = Dataset({'_': ('x', [0, 1, 2, 3])}, coords={'x': [0, 1, 2, 3]}) actual = combine_by_coords(objs) assert_identical(expected, actual) - + @pytest.mark.parametrize( "join, expected", [ From 68b7b498829d8440497ee440360beec16aff49b7 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Sat, 12 Dec 2020 08:15:30 -0500 Subject: [PATCH 03/24] Added tests to check combine_by_coords for exception with mixed DataArrays and dataset input and with empty list. --- xarray/tests/test_combine.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 20649bd1a53..0bb8f57393c 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -668,12 +668,27 @@ def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) def test_combine_coords_unnamed_arrays(self): - objs = [DataArray([0, 1], dims=('x'), coords=({'x': [0, 1]})), - DataArray([2, 3], dims=('x'), coords=({'x': [2, 3]}))] - expected = Dataset({'_': ('x', [0, 1, 2, 3])}, coords={'x': [0, 1, 2, 3]}) + objs = [ + DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), + DataArray([2, 3], dims=("x"), coords=({"x": [2, 3]})), + ] + expected = Dataset({"_": ("x", [0, 1, 2, 3])}, coords={"x": [0, 1, 2, 3]}) actual = combine_by_coords(objs) assert_identical(expected, actual) + def test_combine_coords_mixed_datasets_arrays(self): + objs = [ + DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), + Dataset({"x": [2, 3]}) + ] + with raises_regex(ValueError, "without providing an explicit name"): + combine_by_coords(objs) + + def test_combine_coords_empty_list(self): + expected = Dataset() + actual = combine_by_coords([]) + assert_identical(expected, actual) + @pytest.mark.parametrize( "join, expected", [ From 540961fdbee83bd6b430b1b600c3efaaa859abce Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Tue, 15 Dec 2020 09:52:21 -0500 Subject: [PATCH 04/24] Formatting changes after running black --- xarray/core/combine.py | 2 +- xarray/core/computation.py | 1 + xarray/core/dataset.py | 1 - xarray/core/options.py | 1 + xarray/core/parallel.py | 1 + xarray/tests/test_combine.py | 2 +- 6 files changed, 5 insertions(+), 3 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 0b7930e9aa4..0fe9f1dc2ab 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -763,7 +763,7 @@ def combine_by_coords( # If a set of unnamed data arrays is provided, these arrays are assumed to belong # to the same variable and should be combined. if all_unnamed_data_arrays(data_objects): - datasets = [Dataset({'_': data_array}) for data_array in data_objects] + datasets = [Dataset({"_": data_array}) for data_array in data_objects] else: datasets = data_objects diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 9251edf1cb8..c332ecb0d00 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -931,6 +931,7 @@ def apply_ufunc( >>> def magnitude(a, b): ... func = lambda x, y: np.sqrt(x ** 2 + y ** 2) ... return xr.apply_ufunc(func, a, b) + ... You can now apply ``magnitude()`` to ``xr.DataArray`` and ``xr.Dataset`` objects, with automatically preserved dimensions and coordinates, e.g., diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b624f278c20..40288df841c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5940,7 +5940,6 @@ def filter_by_attrs(self, **kwargs): Examples -------- >>> # Create an example dataset: - ... >>> import numpy as np >>> import pandas as pd >>> import xarray as xr diff --git a/xarray/core/options.py b/xarray/core/options.py index a14473c9b97..07eddb49960 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -111,6 +111,7 @@ class set_options: >>> ds = xr.Dataset({"x": np.arange(1000)}) >>> with xr.set_options(display_width=40): ... print(ds) + ... Dimensions: (x: 1000) Coordinates: diff --git a/xarray/core/parallel.py b/xarray/core/parallel.py index f3a75034058..20b4b9f9eb3 100644 --- a/xarray/core/parallel.py +++ b/xarray/core/parallel.py @@ -232,6 +232,7 @@ def map_blocks( ... gb = da.groupby(groupby_type) ... clim = gb.mean(dim="time") ... return gb - clim + ... >>> time = xr.cftime_range("1990-01", "1992-01", freq="M") >>> month = xr.DataArray(time.month, coords={"time": time}, dims=["time"]) >>> np.random.seed(123) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 0bb8f57393c..d08dcdce57c 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -679,7 +679,7 @@ def test_combine_coords_unnamed_arrays(self): def test_combine_coords_mixed_datasets_arrays(self): objs = [ DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), - Dataset({"x": [2, 3]}) + Dataset({"x": [2, 3]}), ] with raises_regex(ValueError, "without providing an explicit name"): combine_by_coords(objs) From 1c9b4c2fbbf0ac7540207e3d2aa4ffb127b93f51 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Tue, 15 Dec 2020 12:40:03 -0500 Subject: [PATCH 05/24] Added underscore to helper function to label as private. --- xarray/core/combine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 0fe9f1dc2ab..e81e29dda33 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -541,7 +541,7 @@ def vars_as_keys(ds): return tuple(sorted(ds)) -def all_unnamed_data_arrays(data_objects): +def _all_unnamed_data_arrays(data_objects): for data_obj in data_objects: if not isinstance(data_obj, DataArray) or data_obj.name is not None: return False @@ -762,7 +762,7 @@ def combine_by_coords( # If a set of unnamed data arrays is provided, these arrays are assumed to belong # to the same variable and should be combined. - if all_unnamed_data_arrays(data_objects): + if _all_unnamed_data_arrays(data_objects): datasets = [Dataset({"_": data_array}) for data_array in data_objects] else: datasets = data_objects From cb5ed5ebeb0625984e68377bca1cffb8b070e81a Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Tue, 15 Dec 2020 13:00:38 -0500 Subject: [PATCH 06/24] Black formatting changes for whats-new doc file. --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 13dad17e2c4..65c06783dcc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,6 +32,8 @@ Bug fixes ~~~~~~~~~ - :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`). +- :py:func:`combine_by_coords` now accepts a list of unnamed ``DataArray`` as input (:issue:`3248`). + By `Augustus Ijams `_. Documentation ~~~~~~~~~~~~~ From 77020c0dff97a2e2cf826a8703d45ca6e2a53078 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Thu, 17 Dec 2020 09:03:42 -0500 Subject: [PATCH 07/24] Removed imports in docstring that were automatically added by code styling tools to match the other docstrings. --- xarray/core/dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 40288df841c..d6660521acb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5940,9 +5940,6 @@ def filter_by_attrs(self, **kwargs): Examples -------- >>> # Create an example dataset: - >>> import numpy as np - >>> import pandas as pd - >>> import xarray as xr >>> temp = 15 + 8 * np.random.randn(2, 2, 3) >>> precip = 10 * np.random.rand(2, 2, 3) >>> lon = [[-99.83, -99.32], [-99.79, -99.23]] From 61908397fb3f08d38918313bb0d8b8b9892fc92a Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Sat, 19 Dec 2020 12:06:29 -0500 Subject: [PATCH 08/24] Removed duplicate new item line in whats-new. --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4acea307eb1..93d39df8645 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -33,7 +33,6 @@ Bug fixes - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` do not trigger computations anymore if :py:meth:`Dataset.weighted` or :py:meth:`DataArray.weighted` are applied (:issue:`4625`, :pull:`4668`). By `Julius Busecke `_. - :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`). -- :py:func:`combine_by_coords` now accepts a list of unnamed ``DataArray`` as input (:issue:`3248`). - :py:func:`combine_by_coords` now accepts a list of unnamed ``DataArray`` as input (:issue:`3248`, :pull:`4696`). By `Augustus Ijams `_. - :py:meth:`DataArray.astype`, :py:meth:`Dataset.astype` and :py:meth:`Variable.astype` support From cbc002ff0701275b405877afeea60edd44a5b2d4 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Thu, 15 Apr 2021 10:02:36 -0400 Subject: [PATCH 09/24] combine methods now accept unnamed DataArrays as input. --- xarray/core/combine.py | 121 +++++++++++++++++++++++++---------- xarray/tests/test_combine.py | 34 +++++++++- 2 files changed, 119 insertions(+), 36 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 375931e1f9c..4524094c0a2 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -557,6 +557,64 @@ def combine_nested( def vars_as_keys(ds): return tuple(sorted(ds)) +def _combine_single_variable_hypercube( + datasets, + fill_value=dtypes.NA, + data_vars="all", + coords="different", + compat="no_conflicts", + join="outer", + combine_attrs="no_conflicts" +): + """ + Attempt to combine a list of Datasets into a hypercube using their + coordinates. + + All provided Datasets must belong to a single variable, ie. must be + assigned the same variable name. This precondition is not checked by this + function, so the caller is assumed to know what it's doing. + + This function is NOT part of the public API. + """ + if len(datasets) == 0: + raise ValueError( + "At least one Dataset is required to resolve variable names " + "for combined hypercube.") + + combined_ids, concat_dims = _infer_concat_order_from_coords( + list(datasets) + ) + + if fill_value is None: + # check that datasets form complete hypercube + _check_shape_tile_ids(combined_ids) + else: + # check only that all datasets have same dimension depth for these + # vars + _check_dimension_depth_tile_ids(combined_ids) + + # Concatenate along all of concat_dims one by one to create single ds + concatenated = _combine_nd( + combined_ids, + concat_dims=concat_dims, + data_vars=data_vars, + coords=coords, + compat=compat, + fill_value=fill_value, + join=join, + combine_attrs=combine_attrs, + ) + + # Check the overall coordinates are monotonically increasing + for dim in concat_dims: + indexes = concatenated.indexes.get(dim) + if not (indexes.is_monotonic_increasing or indexes.is_monotonic_decreasing): + raise ValueError( + "Resulting object does not have monotonic" + " global indexes along dimension {}".format(dim) + ) + + return concatenated def combine_by_coords( datasets, @@ -768,48 +826,43 @@ def combine_by_coords( temperature (y, x) float64 10.98 14.3 12.06 nan ... 18.89 10.44 8.293 precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176 """ + if not datasets: + return Dataset() - # Group by data vars - sorted_datasets = sorted(datasets, key=vars_as_keys) - grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) - - # Perform the multidimensional combine on each group of data variables - # before merging back together - concatenated_grouped_by_data_vars = [] - for vars, datasets_with_same_vars in grouped_by_vars: - combined_ids, concat_dims = _infer_concat_order_from_coords( - list(datasets_with_same_vars) - ) - - if fill_value is None: - # check that datasets form complete hypercube - _check_shape_tile_ids(combined_ids) - else: - # check only that all datasets have same dimension depth for these - # vars - _check_dimension_depth_tile_ids(combined_ids) + if all(isinstance(data_object, DataArray) and data_object.name is None for data_object in datasets): + unnamed_arrays = datasets + temp_datasets = [data_array._to_temp_dataset() for data_array in unnamed_arrays] - # Concatenate along all of concat_dims one by one to create single ds - concatenated = _combine_nd( - combined_ids, - concat_dims=concat_dims, + combined_temp_dataset = _combine_single_variable_hypercube( + temp_datasets, + fill_value=fill_value, data_vars=data_vars, coords=coords, compat=compat, - fill_value=fill_value, join=join, - combine_attrs=combine_attrs, + combine_attrs=combine_attrs ) + return DataArray()._from_temp_dataset(combined_temp_dataset) - # Check the overall coordinates are monotonically increasing - for dim in concat_dims: - indexes = concatenated.indexes.get(dim) - if not (indexes.is_monotonic_increasing or indexes.is_monotonic_decreasing): - raise ValueError( - "Resulting object does not have monotonic" - " global indexes along dimension {}".format(dim) - ) - concatenated_grouped_by_data_vars.append(concatenated) + else: + # Group by data vars + sorted_datasets = sorted(datasets, key=vars_as_keys) + grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) + + # Perform the multidimensional combine on each group of data variables + # before merging back together + concatenated_grouped_by_data_vars = [] + for vars, datasets_with_same_vars in grouped_by_vars: + concatenated = _combine_single_variable_hypercube( + list(datasets_with_same_vars), + fill_value=fill_value, + data_vars=data_vars, + coords=coords, + compat=compat, + join=join, + combine_attrs=combine_attrs + ) + concatenated_grouped_by_data_vars.append(concatenated) return merge( concatenated_grouped_by_data_vars, diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 7664e5e56db..8c6892b6c97 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -627,6 +627,19 @@ def test_combine_nested_fill_value(self, fill_value): actual = combine_nested(datasets, concat_dim="t", fill_value=fill_value) assert_identical(expected, actual) + def test_combine_nested_unnamed_data_arrays(self): + unnamed_array = DataArray(data=[1.0, 2.0], coords={'x': [0, 1]}, dims='x') + + actual = combine_nested([unnamed_array], concat_dim='x') + expected = unnamed_array + assert_identical(expected, actual) + + unnamed_array1 = DataArray(data=[1.0, 2.0], coords={'x': [0, 1]}, dims='x') + unnamed_array2 = DataArray(data=[3.0, 4.0], coords={'x': [2, 3]}, dims='x') + + actual = combine_nested([unnamed_array1, unnamed_array2], concat_dim='x') + expected = DataArray(data=[1.0, 2.0, 3.0, 4.0], coords={'x': [0, 1, 2, 3]}, dims='x') + assert_identical(expected, actual) class TestCombineAuto: def test_combine_by_coords(self): @@ -665,8 +678,8 @@ def test_combine_by_coords(self): with raises_regex(ValueError, "Every dimension needs a coordinate"): combine_by_coords(objs) - def test_empty_input(self): - assert_identical(Dataset(), combine_by_coords([])) + def test_empty_input(self): + assert_identical(Dataset(), combine_by_coords([])) @pytest.mark.parametrize( "join, expected", @@ -830,6 +843,23 @@ def test_combine_by_coords_incomplete_hypercube(self): with pytest.raises(ValueError): combine_by_coords([x1, x2, x3], fill_value=None) + def test_combine_by_coords_unnamed_arrays(self): + unnamed_array = DataArray(data=[1.0, 2.0], coords={'x': [0, 1]}, dims='x') + + actual = combine_by_coords([unnamed_array]) + expected = unnamed_array + assert_identical(expected, actual) + + unnamed_array1 = DataArray(data=[1.0, 2.0], coords={'x': [0, 1]}, dims='x') + unnamed_array2 = DataArray(data=[3.0, 4.0], coords={'x': [2, 3]}, dims='x') + + actual = combine_by_coords([unnamed_array1, unnamed_array2]) + expected = DataArray( + data=[1.0, 2.0, 3.0, 4.0], + coords={'x': [0, 1, 2, 3]}, + dims='x' + ) + assert_identical(expected, actual) @requires_cftime def test_combine_by_coords_distant_cftime_dates(): From 89ac96268d84eddac146a1eb5394845aa784fb40 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Thu, 15 Apr 2021 12:23:05 -0400 Subject: [PATCH 10/24] combine nested test checks nested lists of unnamed DataArrays. --- xarray/tests/test_combine.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 8c6892b6c97..7dbd79d1bad 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -641,6 +641,16 @@ def test_combine_nested_unnamed_data_arrays(self): expected = DataArray(data=[1.0, 2.0, 3.0, 4.0], coords={'x': [0, 1, 2, 3]}, dims='x') assert_identical(expected, actual) + da1 = DataArray(data=[[0.0]], coords={'x':[0], 'y':[0]}, dims=['x','y']) + da2 = DataArray(data=[[1.0]], coords={'x':[0], 'y':[1]}, dims=['x','y']) + da3 = DataArray(data=[[2.0]], coords={'x':[1], 'y':[0]}, dims=['x','y']) + da4 = DataArray(data=[[3.0]], coords={'x':[1], 'y':[1]}, dims=['x','y']) + objs = [[da1, da2], [da3, da4]] + + expected = DataArray(data=[[0.0, 1.0], [2.0, 3.0]], coords={'x': [0, 1], 'y': [0, 1]}, dims=['x', 'y']) + actual = combine_nested(objs, concat_dim=['x', 'y']) + assert_identical(expected, actual) + class TestCombineAuto: def test_combine_by_coords(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] From 5f3afa50a73b55e2bf31e5ed8b750f9a5f4beb4f Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Thu, 15 Apr 2021 13:05:15 -0400 Subject: [PATCH 11/24] Made combine_by_coords more readable. --- xarray/core/combine.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 4524094c0a2..8bc2c073699 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -617,7 +617,7 @@ def _combine_single_variable_hypercube( return concatenated def combine_by_coords( - datasets, + data_objects, compat="no_conflicts", data_vars="all", coords="different", @@ -626,8 +626,8 @@ def combine_by_coords( combine_attrs="no_conflicts", ): """ - Attempt to auto-magically combine the given datasets into one by using - dimension coordinates. + Attempt to auto-magically combine the given datasets (or data arrays) + into one by using dimension coordinates. This method attempts to combine a group of datasets along any number of dimensions into a single entity by inspecting coords and metadata and using @@ -651,8 +651,8 @@ def combine_by_coords( Parameters ---------- - datasets : sequence of xarray.Dataset - Dataset objects to combine. + data_objects : sequence of xarray.Dataset or sequence of xarray.DataArray + Data objects to combine. compat : {"identical", "equals", "broadcast_equals", "no_conflicts", "override"}, optional String indicating how to compare variables of the same name for potential conflicts: @@ -826,11 +826,15 @@ def combine_by_coords( temperature (y, x) float64 10.98 14.3 12.06 nan ... 18.89 10.44 8.293 precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176 """ - if not datasets: + if not data_objects: return Dataset() - if all(isinstance(data_object, DataArray) and data_object.name is None for data_object in datasets): - unnamed_arrays = datasets + all_unnamed_data_arrays = all( + isinstance(data_object, DataArray) and data_object.name is None + for data_object in data_objects + ) + if all_unnamed_data_arrays: + unnamed_arrays = data_objects temp_datasets = [data_array._to_temp_dataset() for data_array in unnamed_arrays] combined_temp_dataset = _combine_single_variable_hypercube( @@ -846,7 +850,7 @@ def combine_by_coords( else: # Group by data vars - sorted_datasets = sorted(datasets, key=vars_as_keys) + sorted_datasets = sorted(data_objects, key=vars_as_keys) grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) # Perform the multidimensional combine on each group of data variables From feb90ce6b524d70bacbc4ded083d9781e1d20619 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Thu, 15 Apr 2021 17:55:33 -0400 Subject: [PATCH 12/24] Cosmetic changes to code style. --- xarray/core/combine.py | 21 ++++++++-------- xarray/tests/test_combine.py | 46 ++++++++++++++++++++---------------- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 8bc2c073699..da6ecaf6cb5 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -557,6 +557,7 @@ def combine_nested( def vars_as_keys(ds): return tuple(sorted(ds)) + def _combine_single_variable_hypercube( datasets, fill_value=dtypes.NA, @@ -564,26 +565,25 @@ def _combine_single_variable_hypercube( coords="different", compat="no_conflicts", join="outer", - combine_attrs="no_conflicts" + combine_attrs="no_conflicts", ): """ Attempt to combine a list of Datasets into a hypercube using their coordinates. - + All provided Datasets must belong to a single variable, ie. must be assigned the same variable name. This precondition is not checked by this function, so the caller is assumed to know what it's doing. - + This function is NOT part of the public API. """ if len(datasets) == 0: raise ValueError( "At least one Dataset is required to resolve variable names " - "for combined hypercube.") + "for combined hypercube." + ) - combined_ids, concat_dims = _infer_concat_order_from_coords( - list(datasets) - ) + combined_ids, concat_dims = _infer_concat_order_from_coords(list(datasets)) if fill_value is None: # check that datasets form complete hypercube @@ -613,9 +613,10 @@ def _combine_single_variable_hypercube( "Resulting object does not have monotonic" " global indexes along dimension {}".format(dim) ) - + return concatenated + def combine_by_coords( data_objects, compat="no_conflicts", @@ -844,7 +845,7 @@ def combine_by_coords( coords=coords, compat=compat, join=join, - combine_attrs=combine_attrs + combine_attrs=combine_attrs, ) return DataArray()._from_temp_dataset(combined_temp_dataset) @@ -864,7 +865,7 @@ def combine_by_coords( coords=coords, compat=compat, join=join, - combine_attrs=combine_attrs + combine_attrs=combine_attrs, ) concatenated_grouped_by_data_vars.append(concatenated) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 7dbd79d1bad..6ebdadfbafb 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -628,29 +628,36 @@ def test_combine_nested_fill_value(self, fill_value): assert_identical(expected, actual) def test_combine_nested_unnamed_data_arrays(self): - unnamed_array = DataArray(data=[1.0, 2.0], coords={'x': [0, 1]}, dims='x') + unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") - actual = combine_nested([unnamed_array], concat_dim='x') + actual = combine_nested([unnamed_array], concat_dim="x") expected = unnamed_array assert_identical(expected, actual) - unnamed_array1 = DataArray(data=[1.0, 2.0], coords={'x': [0, 1]}, dims='x') - unnamed_array2 = DataArray(data=[3.0, 4.0], coords={'x': [2, 3]}, dims='x') + unnamed_array1 = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") + unnamed_array2 = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") - actual = combine_nested([unnamed_array1, unnamed_array2], concat_dim='x') - expected = DataArray(data=[1.0, 2.0, 3.0, 4.0], coords={'x': [0, 1, 2, 3]}, dims='x') + actual = combine_nested([unnamed_array1, unnamed_array2], concat_dim="x") + expected = DataArray( + data=[1.0, 2.0, 3.0, 4.0], coords={"x": [0, 1, 2, 3]}, dims="x" + ) assert_identical(expected, actual) - da1 = DataArray(data=[[0.0]], coords={'x':[0], 'y':[0]}, dims=['x','y']) - da2 = DataArray(data=[[1.0]], coords={'x':[0], 'y':[1]}, dims=['x','y']) - da3 = DataArray(data=[[2.0]], coords={'x':[1], 'y':[0]}, dims=['x','y']) - da4 = DataArray(data=[[3.0]], coords={'x':[1], 'y':[1]}, dims=['x','y']) + da1 = DataArray(data=[[0.0]], coords={"x": [0], "y": [0]}, dims=["x", "y"]) + da2 = DataArray(data=[[1.0]], coords={"x": [0], "y": [1]}, dims=["x", "y"]) + da3 = DataArray(data=[[2.0]], coords={"x": [1], "y": [0]}, dims=["x", "y"]) + da4 = DataArray(data=[[3.0]], coords={"x": [1], "y": [1]}, dims=["x", "y"]) objs = [[da1, da2], [da3, da4]] - expected = DataArray(data=[[0.0, 1.0], [2.0, 3.0]], coords={'x': [0, 1], 'y': [0, 1]}, dims=['x', 'y']) - actual = combine_nested(objs, concat_dim=['x', 'y']) + expected = DataArray( + data=[[0.0, 1.0], [2.0, 3.0]], + coords={"x": [0, 1], "y": [0, 1]}, + dims=["x", "y"], + ) + actual = combine_nested(objs, concat_dim=["x", "y"]) assert_identical(expected, actual) + class TestCombineAuto: def test_combine_by_coords(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] @@ -854,23 +861,22 @@ def test_combine_by_coords_incomplete_hypercube(self): combine_by_coords([x1, x2, x3], fill_value=None) def test_combine_by_coords_unnamed_arrays(self): - unnamed_array = DataArray(data=[1.0, 2.0], coords={'x': [0, 1]}, dims='x') - + unnamed_array = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") + actual = combine_by_coords([unnamed_array]) expected = unnamed_array assert_identical(expected, actual) - - unnamed_array1 = DataArray(data=[1.0, 2.0], coords={'x': [0, 1]}, dims='x') - unnamed_array2 = DataArray(data=[3.0, 4.0], coords={'x': [2, 3]}, dims='x') + + unnamed_array1 = DataArray(data=[1.0, 2.0], coords={"x": [0, 1]}, dims="x") + unnamed_array2 = DataArray(data=[3.0, 4.0], coords={"x": [2, 3]}, dims="x") actual = combine_by_coords([unnamed_array1, unnamed_array2]) expected = DataArray( - data=[1.0, 2.0, 3.0, 4.0], - coords={'x': [0, 1, 2, 3]}, - dims='x' + data=[1.0, 2.0, 3.0, 4.0], coords={"x": [0, 1, 2, 3]}, dims="x" ) assert_identical(expected, actual) + @requires_cftime def test_combine_by_coords_distant_cftime_dates(): # Regression test for https://github.com/pydata/xarray/issues/3535 From 0044bb9bf8d35b4c91830cba318583f01d203c48 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Sun, 18 Apr 2021 10:26:12 -0400 Subject: [PATCH 13/24] Removed extra test from merge with previous PR. --- xarray/core/combine.py | 2 ++ xarray/tests/test_combine.py | 9 --------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 730b7dba159..2b470542a4e 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -557,6 +557,7 @@ def combine_nested( def vars_as_keys(ds): return tuple(sorted(ds)) + def _combine_single_variable_hypercube( datasets, fill_value=dtypes.NA, @@ -615,6 +616,7 @@ def _combine_single_variable_hypercube( return concatenated + def combine_by_coords( data_objects, compat="no_conflicts", diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index f7fc8ec625c..c304acf5dc6 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -698,15 +698,6 @@ def test_combine_by_coords(self): def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) - def test_combine_coords_unnamed_arrays(self): - objs = [ - DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), - DataArray([2, 3], dims=("x"), coords=({"x": [2, 3]})), - ] - expected = Dataset({"_": ("x", [0, 1, 2, 3])}, coords={"x": [0, 1, 2, 3]}) - actual = combine_by_coords(objs) - assert_identical(expected, actual) - def test_combine_coords_mixed_datasets_arrays(self): objs = [ DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), From 5fe8323144c2ce49ffa66c08ddb70416df9b8517 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Tue, 4 May 2021 18:44:40 -0400 Subject: [PATCH 14/24] Updated test to use pytest.raises instead of raises_regex. --- xarray/tests/test_combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 827512f0c73..13461837b94 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -713,7 +713,7 @@ def test_combine_coords_mixed_datasets_arrays(self): DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), Dataset({"x": [2, 3]}), ] - with raises_regex(ValueError, "without providing an explicit name"): + with pytest.raises(ValueError, match="without providing an explicit name"): combine_by_coords(objs) def test_combine_coords_empty_list(self): From 805145c1dfb448ba3e3a488f5b30251f5a22bf53 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Tue, 11 May 2021 09:51:28 -0400 Subject: [PATCH 15/24] Added breaking-change entry to whats new page. --- doc/whats-new.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 091fb5a8df1..0a4f568106b 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,11 +21,15 @@ v0.18.1 (unreleased) New Features ~~~~~~~~~~~~ - +- :py:func:`combine_by_coords` now accepts a list of unnamed ``DataArray`` as input (:issue:`3248`, :pull:`4696`). + By `Augustus Ijams `_. Breaking changes ~~~~~~~~~~~~~~~~ - +- The main parameter to :py:func:`combine_by_coords` is renamed to `data_objects` instead + of `datasets` so anyone calling this method using a named parameter will need to update + the name accordingly (:issue:`3248`, :pull:`4696`). + By `Augustus Ijams `_. Deprecations ~~~~~~~~~~~~ @@ -403,8 +407,6 @@ Bug fixes `_. - :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` do not trigger computations anymore if :py:meth:`Dataset.weighted` or :py:meth:`DataArray.weighted` are applied (:issue:`4625`, :pull:`4668`). By `Julius Busecke `_. - :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`). -- :py:func:`combine_by_coords` now accepts a list of unnamed ``DataArray`` as input (:issue:`3248`, :pull:`4696`). - By `Augustus Ijams `_. - By default, when possible, xarray will now always use values of type ``int64`` when encoding and decoding ``numpy.datetime64[ns]`` datetimes. This ensures that maximum precision and accuracy are maintained in the round-tripping process From 05faa8878e8ec6172f8460c311e8b0ec67aa174a Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Tue, 11 May 2021 17:33:45 -0400 Subject: [PATCH 16/24] Added deprecation warning to combine_coords --- xarray/core/combine.py | 54 ++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index dff60c66198..b2ecc820cca 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -629,6 +629,7 @@ def combine_by_coords( fill_value=dtypes.NA, join="outer", combine_attrs="no_conflicts", + datasets=None ): """ Attempt to auto-magically combine the given datasets (or data arrays) @@ -831,6 +832,15 @@ def combine_by_coords( temperature (y, x) float64 10.98 14.3 12.06 nan ... 18.89 10.44 8.293 precipitation (y, x) float64 0.4376 0.8918 0.9637 ... 0.5684 0.01879 0.6176 """ + + # TODO remove after version 0.19, see PR4696 + if datasets is not None: + warnings.warn( + "The datasets argument has been renamed to `data_objects`." + " In future passing a value for datasets will raise an error.") + data_objects = datasets + + # TODO aijams - Check the logic of this patched combine_by_coords function. if not data_objects: return Dataset() @@ -853,19 +863,37 @@ def combine_by_coords( ) return DataArray()._from_temp_dataset(combined_temp_dataset) - # Check the overall coordinates are monotonically increasing - # TODO (benbovy - flexible indexes): only with pandas.Index? - for dim in concat_dims: - indexes = concatenated.xindexes.get(dim) - if not ( - indexes.array.is_monotonic_increasing - or indexes.array.is_monotonic_decreasing - ): - raise ValueError( - "Resulting object does not have monotonic" - " global indexes along dimension {}".format(dim) - ) - concatenated_grouped_by_data_vars.append(concatenated) + else: + # Group by data vars + sorted_datasets = sorted(data_objects, key=vars_as_keys) + grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) + + # Perform the multidimensional combine on each group of data variables + # before merging back together + concatenated_grouped_by_data_vars = [] + for vars, datasets_with_same_vars in grouped_by_vars: + concatenated = _combine_single_variable_hypercube( + list(datasets_with_same_vars), + fill_value=fill_value, + data_vars=data_vars, + coords=coords, + compat=compat, + join=join, + combine_attrs=combine_attrs, + ) + # Check the overall coordinates are monotonically increasing + # TODO (benbovy - flexible indexes): only with pandas.Index? + for dim in concat_dims: + indexes = concatenated.xindexes.get(dim) + if not ( + indexes.array.is_monotonic_increasing + or indexes.array.is_monotonic_decreasing + ): + raise ValueError( + "Resulting object does not have monotonic" + " global indexes along dimension {}".format(dim) + ) + concatenated_grouped_by_data_vars.append(concatenated) return merge( concatenated_grouped_by_data_vars, From 6c755256a7cdbf64582368d53ee2233dfa8ac556 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Tue, 11 May 2021 17:35:09 -0400 Subject: [PATCH 17/24] Removed index monotonicity checking temporarily. --- xarray/core/combine.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index b2ecc820cca..7790f399e03 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -881,18 +881,6 @@ def combine_by_coords( join=join, combine_attrs=combine_attrs, ) - # Check the overall coordinates are monotonically increasing - # TODO (benbovy - flexible indexes): only with pandas.Index? - for dim in concat_dims: - indexes = concatenated.xindexes.get(dim) - if not ( - indexes.array.is_monotonic_increasing - or indexes.array.is_monotonic_decreasing - ): - raise ValueError( - "Resulting object does not have monotonic" - " global indexes along dimension {}".format(dim) - ) concatenated_grouped_by_data_vars.append(concatenated) return merge( From 2c43030db34d01682e068b97b09751eae3a7c804 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Wed, 12 May 2021 07:59:24 -0400 Subject: [PATCH 18/24] Removed duplicate entries from whats new page. --- doc/whats-new.rst | 8 -------- 1 file changed, 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index a0596492231..e16c0cd802e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -408,14 +408,6 @@ Bug fixes calendar dates with time units of nanoseconds (:pull:`4400`). By `Spencer Clark `_ and `Mark Harfouche `_. -- :py:meth:`DataArray.resample` and :py:meth:`Dataset.resample` do not trigger computations anymore if :py:meth:`Dataset.weighted` or :py:meth:`DataArray.weighted` are applied (:issue:`4625`, :pull:`4668`). By `Julius Busecke `_. -- :py:func:`merge` with ``combine_attrs='override'`` makes a copy of the attrs (:issue:`4627`). -- By default, when possible, xarray will now always use values of type ``int64`` when encoding - and decoding ``numpy.datetime64[ns]`` datetimes. This ensures that maximum - precision and accuracy are maintained in the round-tripping process - (:issue:`4045`, :pull:`4684`). It also enables encoding and decoding standard calendar - dates with time units of nanoseconds (:pull:`4400`). By `Spencer Clark - `_ and `Mark Harfouche `_. - :py:meth:`DataArray.astype`, :py:meth:`Dataset.astype` and :py:meth:`Variable.astype` support the ``order`` and ``subok`` parameters again. This fixes a regression introduced in version 0.16.1 (:issue:`4644`, :pull:`4683`). From f6fae25289a3b08382b229d5b785b122b64da664 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Wed, 12 May 2021 08:01:26 -0400 Subject: [PATCH 19/24] Removed TODO message --- xarray/core/combine.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 7790f399e03..a3e8e2273a2 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -840,7 +840,6 @@ def combine_by_coords( " In future passing a value for datasets will raise an error.") data_objects = datasets - # TODO aijams - Check the logic of this patched combine_by_coords function. if not data_objects: return Dataset() From 81ec1ffe7796c853cb50682636e596b21ce8f3f5 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Sun, 16 May 2021 10:53:25 -0400 Subject: [PATCH 20/24] Added test for combine_nested. --- xarray/tests/test_combine.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index c1c379163db..1380c15b069 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -673,6 +673,14 @@ def test_combine_nested_unnamed_data_arrays(self): actual = combine_nested(objs, concat_dim=["x", "y"]) assert_identical(expected, actual) + # TODO aijams - Determine if this test is appropriate. + def test_nested_combine_mixed_datasets_arrays(self): + objs = [ + DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), + Dataset({"x": [2, 3]}), + ] + with pytest.raises(ValueError, match="without providing an explicit name"): + combine_nested(objs, "x") class TestCombineAuto: def test_combine_by_coords(self): From 637d4cc766c2ee87d08598609a96bff7e00ae8dd Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Sun, 16 May 2021 18:45:13 -0400 Subject: [PATCH 21/24] Added check to combine methods to clarify parameter requirements. --- xarray/core/combine.py | 21 ++++++++++++++++++--- xarray/core/utils.py | 7 +++++++ xarray/tests/test_combine.py | 10 ++++++++-- xarray/tests/test_utils.py | 16 +++++++++++++++- 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 61be4b311ba..9debaadb1ae 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -8,7 +8,7 @@ from .dataarray import DataArray from .dataset import Dataset from .merge import merge - +from .utils import iterate_nested def _infer_concat_order_from_positions(datasets): return dict(_infer_tile_ids_from_nested_list(datasets, ())) @@ -310,7 +310,6 @@ def _new_tile_id(single_id_ds_pair): tile_id, ds = single_id_ds_pair return tile_id[1:] - def _nested_combine( datasets, concat_dims, @@ -352,7 +351,6 @@ def _nested_combine( ) return combined - def combine_nested( datasets, concat_dim, @@ -540,6 +538,14 @@ def combine_nested( concat merge """ + mixed_datasets_and_arrays = any( + isinstance(obj, Dataset) for obj in iterate_nested(datasets) + ) and any( + isinstance(obj, DataArray) and obj.name is None for obj in iterate_nested(datasets) + ) + if mixed_datasets_and_arrays: + raise ValueError("Can't combine datasets with unnamed arrays.") + if isinstance(concat_dim, (str, DataArray)) or concat_dim is None: concat_dim = [concat_dim] @@ -840,6 +846,15 @@ def combine_by_coords( if not data_objects: return Dataset() + mixed_arrays_and_datasets = any( + isinstance(data_object, DataArray) and data_object.name is None + for data_object in data_objects + ) and any( + isinstance(data_object, Dataset) for data_object in data_objects + ) + if mixed_arrays_and_datasets: + raise ValueError("Can't automatically combine datasets with unnamed arrays.") + all_unnamed_data_arrays = all( isinstance(data_object, DataArray) and data_object.name is None for data_object in data_objects diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 62b66278b24..74ac33536c1 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -933,3 +933,10 @@ class Default(Enum): _default = Default.token + +def iterate_nested(nested_list): + for item in nested_list: + if isinstance(item, list): + yield from iterate_nested(item) + else: + yield item diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 1380c15b069..609882cac97 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -679,7 +679,10 @@ def test_nested_combine_mixed_datasets_arrays(self): DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), Dataset({"x": [2, 3]}), ] - with pytest.raises(ValueError, match="without providing an explicit name"): + with pytest.raises( + ValueError, + match=r"Can't combine datasets with unnamed arrays." + ): combine_nested(objs, "x") class TestCombineAuto: @@ -729,7 +732,10 @@ def test_combine_coords_mixed_datasets_arrays(self): DataArray([0, 1], dims=("x"), coords=({"x": [0, 1]})), Dataset({"x": [2, 3]}), ] - with pytest.raises(ValueError, match="without providing an explicit name"): + with pytest.raises( + ValueError, + match=r"Can't automatically combine datasets with unnamed arrays." + ): combine_by_coords(objs) @pytest.mark.parametrize( diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 3044dbaae50..13461a975ce 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -7,7 +7,7 @@ from xarray.coding.cftimeindex import CFTimeIndex from xarray.core import duck_array_ops, utils -from xarray.core.utils import either_dict_or_kwargs +from xarray.core.utils import either_dict_or_kwargs, iterate_nested from . import assert_array_equal, requires_cftime, requires_dask from .test_coding_times import _all_cftime_date_types @@ -321,3 +321,17 @@ def test_infix_dims(supplied, all_, expected): def test_infix_dims_errors(supplied, all_): with pytest.raises(ValueError): list(utils.infix_dims(supplied, all_)) + +@pytest.mark.parametrize( + "nested_list, expected", + [ + ([], []), + ([1], [1]), + ([1, 2, 3], [1, 2, 3]), + ([[1]], [1]), + ([[1, 2], [3, 4]], [1, 2, 3, 4]), + ([[[1, 2, 3], [4]], [5, 6]], [1, 2, 3, 4, 5, 6]) + ] +) +def test_iterate_nested(nested_list, expected): + assert list(iterate_nested(nested_list)) == expected \ No newline at end of file From b5940a1b5af2742102ca145eb27957ab606e450d Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Wed, 19 May 2021 07:18:45 -0400 Subject: [PATCH 22/24] Reassigned description of changes to bug fixes category. --- doc/whats-new.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 820be389609..ead19c4662e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,9 +21,6 @@ v0.18.1 (unreleased) New Features ~~~~~~~~~~~~ -- :py:func:`combine_by_coords` now accepts a list of unnamed ``DataArray`` as input (:issue:`3248`, :pull:`4696`). - By `Augustus Ijams `_. - - Implement :py:meth:`DataArray.drop_duplicates` to remove duplicate dimension values (:pull:`5239`). By `Andrew Huang `_. @@ -49,7 +46,9 @@ Deprecations Bug fixes ~~~~~~~~~ - +- :py:func:`combine_by_coords` can now handle combining a list of unnamed + ``DataArray`` as input (:issue:`3248`, :pull:`4696`). + By `Augustus Ijams `_. - Opening netCDF files from a path that doesn't end in ``.nc`` without supplying an explicit ``engine`` works again (:issue:`5295`), fixing a bug introduced in 0.18.0. From 04cd5f8495683a127054438e26647af05d0bfd62 Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Wed, 19 May 2021 10:35:25 -0400 Subject: [PATCH 23/24] Minor style changes. --- xarray/core/combine.py | 20 ++++++++++++-------- xarray/core/utils.py | 1 + xarray/tests/test_combine.py | 12 ++++++------ xarray/tests/test_utils.py | 7 ++++--- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 9debaadb1ae..510d0831693 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -1,4 +1,5 @@ import itertools +import warnings from collections import Counter import pandas as pd @@ -10,6 +11,7 @@ from .merge import merge from .utils import iterate_nested + def _infer_concat_order_from_positions(datasets): return dict(_infer_tile_ids_from_nested_list(datasets, ())) @@ -310,6 +312,7 @@ def _new_tile_id(single_id_ds_pair): tile_id, ds = single_id_ds_pair return tile_id[1:] + def _nested_combine( datasets, concat_dims, @@ -351,6 +354,7 @@ def _nested_combine( ) return combined + def combine_nested( datasets, concat_dim, @@ -541,7 +545,8 @@ def combine_nested( mixed_datasets_and_arrays = any( isinstance(obj, Dataset) for obj in iterate_nested(datasets) ) and any( - isinstance(obj, DataArray) and obj.name is None for obj in iterate_nested(datasets) + isinstance(obj, DataArray) and obj.name is None + for obj in iterate_nested(datasets) ) if mixed_datasets_and_arrays: raise ValueError("Can't combine datasets with unnamed arrays.") @@ -625,16 +630,16 @@ def _combine_single_variable_hypercube( return concatenated - +# TODO remove empty list default param after version 0.19, see PR4696 def combine_by_coords( - data_objects, + data_objects=[], compat="no_conflicts", data_vars="all", coords="different", fill_value=dtypes.NA, join="outer", combine_attrs="no_conflicts", - datasets=None + datasets=None, ): """ Attempt to auto-magically combine the given datasets (or data arrays) @@ -840,7 +845,8 @@ def combine_by_coords( if datasets is not None: warnings.warn( "The datasets argument has been renamed to `data_objects`." - " In future passing a value for datasets will raise an error.") + " In future passing a value for datasets will raise an error." + ) data_objects = datasets if not data_objects: @@ -849,9 +855,7 @@ def combine_by_coords( mixed_arrays_and_datasets = any( isinstance(data_object, DataArray) and data_object.name is None for data_object in data_objects - ) and any( - isinstance(data_object, Dataset) for data_object in data_objects - ) + ) and any(isinstance(data_object, Dataset) for data_object in data_objects) if mixed_arrays_and_datasets: raise ValueError("Can't automatically combine datasets with unnamed arrays.") diff --git a/xarray/core/utils.py b/xarray/core/utils.py index f28bb2cee58..86c56915d58 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -933,6 +933,7 @@ class Default(Enum): _default = Default.token + def iterate_nested(nested_list): for item in nested_list: if isinstance(item, list): diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index 609882cac97..95d7f495f50 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -680,11 +680,11 @@ def test_nested_combine_mixed_datasets_arrays(self): Dataset({"x": [2, 3]}), ] with pytest.raises( - ValueError, - match=r"Can't combine datasets with unnamed arrays." - ): + ValueError, match=r"Can't combine datasets with unnamed arrays." + ): combine_nested(objs, "x") + class TestCombineAuto: def test_combine_by_coords(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] @@ -733,9 +733,9 @@ def test_combine_coords_mixed_datasets_arrays(self): Dataset({"x": [2, 3]}), ] with pytest.raises( - ValueError, - match=r"Can't automatically combine datasets with unnamed arrays." - ): + ValueError, + match=r"Can't automatically combine datasets with unnamed arrays.", + ): combine_by_coords(objs) @pytest.mark.parametrize( diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 13461a975ce..1d3136b0579 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -322,6 +322,7 @@ def test_infix_dims_errors(supplied, all_): with pytest.raises(ValueError): list(utils.infix_dims(supplied, all_)) + @pytest.mark.parametrize( "nested_list, expected", [ @@ -330,8 +331,8 @@ def test_infix_dims_errors(supplied, all_): ([1, 2, 3], [1, 2, 3]), ([[1]], [1]), ([[1, 2], [3, 4]], [1, 2, 3, 4]), - ([[[1, 2, 3], [4]], [5, 6]], [1, 2, 3, 4, 5, 6]) - ] + ([[[1, 2, 3], [4]], [5, 6]], [1, 2, 3, 4, 5, 6]), + ], ) def test_iterate_nested(nested_list, expected): - assert list(iterate_nested(nested_list)) == expected \ No newline at end of file + assert list(iterate_nested(nested_list)) == expected From e58a9e202c9909c4143e47d5ccea8c0a23498e2d Mon Sep 17 00:00:00 2001 From: Augustus Ijams Date: Wed, 19 May 2021 10:54:40 -0400 Subject: [PATCH 24/24] Added blank line for style purposes. --- xarray/core/combine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 510d0831693..752c9b8aedd 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -630,6 +630,7 @@ def _combine_single_variable_hypercube( return concatenated + # TODO remove empty list default param after version 0.19, see PR4696 def combine_by_coords( data_objects=[],