From 1371507ff41d016fc34c20299eb83659c83b72d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 5 Jun 2024 12:49:05 +0200 Subject: [PATCH 01/13] FIX: do not sort datasets in combine_by_coords --- xarray/core/combine.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 5cb0a3417fa..004f6b0c067 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -950,8 +950,7 @@ def combine_by_coords( ] # Group by data vars - sorted_datasets = sorted(data_objects, key=vars_as_keys) - grouped_by_vars = itertools.groupby(sorted_datasets, key=vars_as_keys) + grouped_by_vars = itertools.groupby(data_objects, key=vars_as_keys) # Perform the multidimensional combine on each group of data variables # before merging back together From 94d315e2ab551b366e21cdb50bbb7ed8e7b2efb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 5 Jun 2024 13:22:56 +0200 Subject: [PATCH 02/13] add test --- xarray/tests/test_combine.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index ea1659e4539..92045a5aaec 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -1034,6 +1034,20 @@ def test_combine_by_coords_incomplete_hypercube(self): with pytest.raises(ValueError): combine_by_coords([x1, x2, x3], fill_value=None) + def test_combine_by_coords_override_order(self): + # regression test for https://github.com/pydata/xarray/issues/8828 + x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) + x2 = Dataset( + {"a": (("y", "x"), [[2]]), "b": (("y", "x"), [[1]])}, + coords={"y": [0], "x": [0]}, + ) + actual = combine_by_coords([x1, x2], compat="override") + assert_equal(actual["a"], actual["b"]) + assert_equal(actual["a"], x1["a"]) + + actual = combine_by_coords([x2, x1], compat="override") + assert_equal(actual["a"], x2["a"]) + class TestCombineMixedObjectsbyCoords: def test_combine_by_coords_mixed_unnamed_dataarrays(self): From 9b385666994ab0e002fc68b90568af433f3e9be4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Wed, 5 Jun 2024 15:45:24 +0200 Subject: [PATCH 03/13] add whats-new.rst entry --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 6a97ceaff00..399c7d5de23 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,9 @@ Deprecations Bug fixes ~~~~~~~~~ +- Preserve order of variables in :py:func:`xarray.combine_by_coords` (:issue:`8828`, :pull:`9070`). + By `Kai Mühlbauer `_. + Documentation ~~~~~~~~~~~~~ From 8202420cc9d66bb4894367e11ed536a63ec3c1c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Thu, 6 Jun 2024 13:49:02 +0200 Subject: [PATCH 04/13] use groupby_defaultdict --- xarray/core/combine.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 004f6b0c067..db05787eac4 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -1,7 +1,7 @@ from __future__ import annotations import itertools -from collections import Counter +from collections import Counter, defaultdict from collections.abc import Iterable, Sequence from typing import TYPE_CHECKING, Literal, Union @@ -591,6 +591,15 @@ def vars_as_keys(ds): return tuple(sorted(ds)) +def groupby_defaultdict(iter, key=lambda x: x): + """replacement for itertools.groupby""" + idx = defaultdict(list) + for i, obj in enumerate(iter): + idx[key(obj)].append(i) + for k, ix in idx.items(): + yield k, (iter[i] for i in ix) + + def _combine_single_variable_hypercube( datasets, fill_value=dtypes.NA, @@ -950,7 +959,7 @@ def combine_by_coords( ] # Group by data vars - grouped_by_vars = itertools.groupby(data_objects, key=vars_as_keys) + grouped_by_vars = groupby_defaultdict(data_objects, key=vars_as_keys) # Perform the multidimensional combine on each group of data variables # before merging back together From 8d4e5f882dc4a87db0fa1514969a6c5e065faaac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 10 Nov 2024 18:26:48 +0100 Subject: [PATCH 05/13] Apply suggestions from code review Co-authored-by: Michael Niklas --- xarray/core/combine.py | 5 ++++- xarray/tests/test_combine.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index b96b356e230..80165e22be9 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -606,7 +606,10 @@ def vars_as_keys(ds): return tuple(sorted(ds)) -def groupby_defaultdict(iter, key=lambda x: x): +T = TypeVar("T") +K = TypeVar("K", bound=Hashable) + +def groupby_defaultdict(iter: list[T], key: Callable[[T], K] = lambda x: x) -> Iterator[tuple[K, Iterator[T]]]: """replacement for itertools.groupby""" idx = defaultdict(list) for i, obj in enumerate(iter): diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index bc287e1d0a4..cc20ab414ee 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -1043,7 +1043,7 @@ def test_combine_by_coords_incomplete_hypercube(self): with pytest.raises(ValueError): combine_by_coords([x1, x2, x3], fill_value=None) - def test_combine_by_coords_override_order(self): + def test_combine_by_coords_override_order(self) -> None: # regression test for https://github.com/pydata/xarray/issues/8828 x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) x2 = Dataset( From 089e7547f33972b987ece4e6636af32f8dd7ed3b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Nov 2024 17:27:00 +0000 Subject: [PATCH 06/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/combine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 80165e22be9..b0c1d6b2efe 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -609,7 +609,10 @@ def vars_as_keys(ds): T = TypeVar("T") K = TypeVar("K", bound=Hashable) -def groupby_defaultdict(iter: list[T], key: Callable[[T], K] = lambda x: x) -> Iterator[tuple[K, Iterator[T]]]: + +def groupby_defaultdict( + iter: list[T], key: Callable[[T], K] = lambda x: x +) -> Iterator[tuple[K, Iterator[T]]]: """replacement for itertools.groupby""" idx = defaultdict(list) for i, obj in enumerate(iter): From 1e772d03466c095148fc769f4c5236a967ee259d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 10 Nov 2024 18:30:32 +0100 Subject: [PATCH 07/13] fix typing --- xarray/core/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index b0c1d6b2efe..82a01aadb76 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -3,7 +3,7 @@ import itertools from collections import Counter, defaultdict from collections.abc import Iterable, Iterator, Sequence -from typing import TYPE_CHECKING, Literal, TypeVar, Union, cast +from typing import TYPE_CHECKING, Callable, Iterable, Literal, TypeVar, Union, cast import pandas as pd From 12bf8c838f945306a702913460777dfd2fb262a8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 10 Nov 2024 17:30:45 +0000 Subject: [PATCH 08/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/combine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 82a01aadb76..697536d6d55 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -2,8 +2,8 @@ import itertools from collections import Counter, defaultdict -from collections.abc import Iterable, Iterator, Sequence -from typing import TYPE_CHECKING, Callable, Iterable, Literal, TypeVar, Union, cast +from collections.abc import Callable, Iterable, Iterator, Sequence +from typing import TYPE_CHECKING, Literal, TypeVar, Union, cast import pandas as pd From cf3e94f71eb8cdf72b535d7dfd22f0d52323faea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kai=20M=C3=BChlbauer?= Date: Sun, 10 Nov 2024 18:32:29 +0100 Subject: [PATCH 09/13] Update xarray/core/combine.py --- xarray/core/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 697536d6d55..aec781c3513 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -2,7 +2,7 @@ import itertools from collections import Counter, defaultdict -from collections.abc import Callable, Iterable, Iterator, Sequence +from collections.abc import Callable, Hashable, Iterable, Iterator, Sequence from typing import TYPE_CHECKING, Literal, TypeVar, Union, cast import pandas as pd From 08541d374afc050424516d53ebad000c7ff81d86 Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Mon, 27 Jan 2025 15:39:45 +0100 Subject: [PATCH 10/13] fix typing, replace other occurrence --- xarray/core/combine.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index aec781c3513..6e435191b0d 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -269,10 +269,7 @@ def _combine_all_along_first_dim( combine_attrs: CombineAttrsOptions = "drop", ): # Group into lines of datasets which must be combined along dim - # need to sort by _new_tile_id first for groupby to work - # TODO: is the sorted need? - combined_ids = dict(sorted(combined_ids.items(), key=_new_tile_id)) - grouped = itertools.groupby(combined_ids.items(), key=_new_tile_id) + grouped = groupby_defaultdict(combined_ids.items(), key=_new_tile_id) # Combine all of these datasets along dim new_combined_ids = {} @@ -606,12 +603,11 @@ def vars_as_keys(ds): return tuple(sorted(ds)) -T = TypeVar("T") K = TypeVar("K", bound=Hashable) def groupby_defaultdict( - iter: list[T], key: Callable[[T], K] = lambda x: x + iter: list[T], key: Callable[[T], K], ) -> Iterator[tuple[K, Iterator[T]]]: """replacement for itertools.groupby""" idx = defaultdict(list) From 7a9ac3651f86fd2d980066fe6bee1c2b7bdaa936 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 14:40:16 +0000 Subject: [PATCH 11/13] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/combine.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 6e435191b0d..0c7add1c7f6 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -1,6 +1,5 @@ from __future__ import annotations -import itertools from collections import Counter, defaultdict from collections.abc import Callable, Hashable, Iterable, Iterator, Sequence from typing import TYPE_CHECKING, Literal, TypeVar, Union, cast @@ -607,7 +606,8 @@ def vars_as_keys(ds): def groupby_defaultdict( - iter: list[T], key: Callable[[T], K], + iter: list[T], + key: Callable[[T], K], ) -> Iterator[tuple[K, Iterator[T]]]: """replacement for itertools.groupby""" idx = defaultdict(list) From a506c04a6c5c72c2f5d05fb169c0558243742e2b Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Mon, 27 Jan 2025 15:54:53 +0100 Subject: [PATCH 12/13] fix groupby --- xarray/core/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 0c7add1c7f6..b8a5a64c9f2 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -268,7 +268,7 @@ def _combine_all_along_first_dim( combine_attrs: CombineAttrsOptions = "drop", ): # Group into lines of datasets which must be combined along dim - grouped = groupby_defaultdict(combined_ids.items(), key=_new_tile_id) + grouped = groupby_defaultdict(combined_ids, key=_new_tile_id) # Combine all of these datasets along dim new_combined_ids = {} From 3e3bfadae71eb6cd5ce7c6516850a28ca54386b0 Mon Sep 17 00:00:00 2001 From: Kai Muehlbauer Date: Mon, 27 Jan 2025 16:11:29 +0100 Subject: [PATCH 13/13] fix groupby --- xarray/core/combine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index b8a5a64c9f2..f02d046fff6 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -268,7 +268,7 @@ def _combine_all_along_first_dim( combine_attrs: CombineAttrsOptions = "drop", ): # Group into lines of datasets which must be combined along dim - grouped = groupby_defaultdict(combined_ids, key=_new_tile_id) + grouped = groupby_defaultdict(list(combined_ids.items()), key=_new_tile_id) # Combine all of these datasets along dim new_combined_ids = {}