From c3b47db42c40dfe7620c6d3ebcbde5565f9052f8 Mon Sep 17 00:00:00 2001 From: jbusecke Date: Sat, 31 Oct 2020 15:09:57 -0400 Subject: [PATCH 01/20] Use map_blocks for weighted init checks --- xarray/core/weighted.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 96b4c79f245..6947d2022f8 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -99,12 +99,17 @@ def __init__(self, obj, weights): if not isinstance(weights, DataArray): raise ValueError("`weights` must be a DataArray") - - if weights.isnull().any(): - raise ValueError( - "`weights` cannot contain missing values. " - "Missing values can be replaced by `weights.fillna(0)`." - ) + + def _weight_check(w): + if w.isnull().any(): + raise ValueError( + "`weights` cannot contain missing values. " + "Missing values can be replaced by `weights.fillna(0)`." + ) + else: + return w + + weights = weights.map_blocks(_weight_check, template=weights) self.obj = obj self.weights = weights From 36f83bcf9c8b733d3f50a43ffd7ebfed294efea7 Mon Sep 17 00:00:00 2001 From: jbusecke Date: Sat, 31 Oct 2020 15:36:34 -0400 Subject: [PATCH 02/20] added dask test --- xarray/core/weighted.py | 5 ++--- xarray/tests/test_weighted.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 6947d2022f8..d5d7b23dfc8 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -99,7 +99,7 @@ def __init__(self, obj, weights): if not isinstance(weights, DataArray): raise ValueError("`weights` must be a DataArray") - + def _weight_check(w): if w.isnull().any(): raise ValueError( @@ -108,9 +108,8 @@ def _weight_check(w): ) else: return w - - weights = weights.map_blocks(_weight_check, template=weights) + weights = weights.map_blocks(_weight_check, template=weights) self.obj = obj self.weights = weights diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 48fad296664..f00f596e81a 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -29,6 +29,22 @@ def test_weighted_weights_nan_raises(as_dataset, weights): data.weighted(DataArray(weights)) +@pytest.mark.parametrize("as_dataset", (True, False)) +@pytest.mark.parametrize("weights", ([np.nan, 2], [np.nan, np.nan])) +def test_weighted_weights_nan_raises_dask(as_dataset, weights): + + data = DataArray([1, 2]).chunk({"dim_0": -1}) + if as_dataset: + data = data.to_dataset(name="data") + + weights = DataArray(weights).chunk({"dim_0": -1}) + + weighted = data.weighted(weights) + + with pytest.raises(ValueError, match="`weights` cannot contain missing values."): + weighted.sum().load() + + @pytest.mark.parametrize( ("weights", "expected"), (([1, 2], 3), ([2, 0], 2), ([0, 0], np.nan), ([-1, 1], np.nan)), From 5cabe4ad7591fc33fca16526332142d164c3e820 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Sun, 1 Nov 2020 16:43:00 -0500 Subject: [PATCH 03/20] Update xarray/core/weighted.py Co-authored-by: Deepak Cherian --- xarray/core/weighted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index d5d7b23dfc8..de7d5aab26b 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -109,7 +109,7 @@ def _weight_check(w): else: return w - weights = weights.map_blocks(_weight_check, template=weights) + weights = dask.array.map_blocks(_weight_check, weights.data) self.obj = obj self.weights = weights From dc129bbe1bb253d79bdda381cf1436328922c993 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Sun, 1 Nov 2020 17:21:21 -0500 Subject: [PATCH 04/20] Update xarray/core/weighted.py Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/weighted.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index de7d5aab26b..aec1ef7e20d 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -106,8 +106,7 @@ def _weight_check(w): "`weights` cannot contain missing values. " "Missing values can be replaced by `weights.fillna(0)`." ) - else: - return w + return w weights = dask.array.map_blocks(_weight_check, weights.data) self.obj = obj From a3b51e08942ce7395f770bd09aad74c60f9a7678 Mon Sep 17 00:00:00 2001 From: jbusecke Date: Sun, 1 Nov 2020 17:36:55 -0500 Subject: [PATCH 05/20] implement requires_dask --- xarray/core/weighted.py | 4 +++- xarray/tests/test_weighted.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index aec1ef7e20d..71216e2f609 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -96,6 +96,7 @@ def __init__(self, obj, weights): """ from .dataarray import DataArray + import dask.array as dsa if not isinstance(weights, DataArray): raise ValueError("`weights` must be a DataArray") @@ -108,7 +109,8 @@ def _weight_check(w): ) return w - weights = dask.array.map_blocks(_weight_check, weights.data) + weights = weights.map_blocks(_weight_check, template=weights) + # weights = dsa.map_blocks(_weight_check, weights.data, dtype=weights.dtype) self.obj = obj self.weights = weights diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index f00f596e81a..d2c68d7ab8b 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -5,6 +5,8 @@ from xarray import DataArray from xarray.tests import assert_allclose, assert_equal, raises_regex +from . import requires_dask + @pytest.mark.parametrize("as_dataset", (True, False)) def test_weighted_non_DataArray_weights(as_dataset): @@ -29,6 +31,7 @@ def test_weighted_weights_nan_raises(as_dataset, weights): data.weighted(DataArray(weights)) +@requires_dask @pytest.mark.parametrize("as_dataset", (True, False)) @pytest.mark.parametrize("weights", ([np.nan, 2], [np.nan, np.nan])) def test_weighted_weights_nan_raises_dask(as_dataset, weights): From 2f0cc53cd41e9b4e54026d5851536df08b92c1e4 Mon Sep 17 00:00:00 2001 From: jbusecke Date: Sun, 1 Nov 2020 17:42:10 -0500 Subject: [PATCH 06/20] Implement raise_if_dask_computes --- xarray/tests/test_weighted.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index d2c68d7ab8b..25252e43b05 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -5,7 +5,7 @@ from xarray import DataArray from xarray.tests import assert_allclose, assert_equal, raises_regex -from . import requires_dask +from . import requires_dask, raise_if_dask_computes @pytest.mark.parametrize("as_dataset", (True, False)) @@ -42,7 +42,8 @@ def test_weighted_weights_nan_raises_dask(as_dataset, weights): weights = DataArray(weights).chunk({"dim_0": -1}) - weighted = data.weighted(weights) + with raise_if_dask_computes(): + weighted = data.weighted(weights) with pytest.raises(ValueError, match="`weights` cannot contain missing values."): weighted.sum().load() From feb0f1c79dbbf55db0466c68f75d4ea06d3d74ba Mon Sep 17 00:00:00 2001 From: jbusecke Date: Sun, 1 Nov 2020 18:09:37 -0500 Subject: [PATCH 07/20] Added logic to check for dask arrays --- xarray/core/weighted.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 71216e2f609..0e458eedb4e 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,6 +1,8 @@ from typing import TYPE_CHECKING, Hashable, Iterable, Optional, Union, overload +import numpy as np from .computation import dot +from .pycompat import is_duck_dask_array from .options import _get_keep_attrs if TYPE_CHECKING: @@ -96,21 +98,27 @@ def __init__(self, obj, weights): """ from .dataarray import DataArray - import dask.array as dsa if not isinstance(weights, DataArray): raise ValueError("`weights` must be a DataArray") def _weight_check(w): - if w.isnull().any(): + if np.isnan(w).any(): raise ValueError( "`weights` cannot contain missing values. " "Missing values can be replaced by `weights.fillna(0)`." ) - return w + return w.data + + if is_duck_dask_array(weights.data): + import dask.array as dsa + + weights.data = dsa.map_blocks( + _weight_check, weights.data, dtype=weights.dtype + ) + else: + weights.data = _weight_check(weights.data) - weights = weights.map_blocks(_weight_check, template=weights) - # weights = dsa.map_blocks(_weight_check, weights.data, dtype=weights.dtype) self.obj = obj self.weights = weights From 4da71202f25f6cd36095f04bb4895037960dcce1 Mon Sep 17 00:00:00 2001 From: jbusecke Date: Sun, 1 Nov 2020 18:19:25 -0500 Subject: [PATCH 08/20] applied isort --- xarray/core/weighted.py | 3 ++- xarray/tests/test_weighted.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 0e458eedb4e..76a76a8cb6d 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,9 +1,10 @@ from typing import TYPE_CHECKING, Hashable, Iterable, Optional, Union, overload import numpy as np + from .computation import dot -from .pycompat import is_duck_dask_array from .options import _get_keep_attrs +from .pycompat import is_duck_dask_array if TYPE_CHECKING: from .dataarray import DataArray, Dataset diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index 25252e43b05..2366b982cec 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -5,7 +5,7 @@ from xarray import DataArray from xarray.tests import assert_allclose, assert_equal, raises_regex -from . import requires_dask, raise_if_dask_computes +from . import raise_if_dask_computes, requires_dask @pytest.mark.parametrize("as_dataset", (True, False)) From 8886baa514e95f47dfc7cb45302df1470649d706 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Wed, 4 Nov 2020 09:37:18 -0500 Subject: [PATCH 09/20] Update xarray/core/weighted.py Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/weighted.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 76a76a8cb6d..4d245c366d1 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -104,6 +104,7 @@ def __init__(self, obj, weights): raise ValueError("`weights` must be a DataArray") def _weight_check(w): + # Ref https://github.com/pydata/xarray/pull/4559/files#r515968670 if np.isnan(w).any(): raise ValueError( "`weights` cannot contain missing values. " From dc92dbeda2ae9478afc36135f475b8ad4dcc5799 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Wed, 4 Nov 2020 09:41:04 -0500 Subject: [PATCH 10/20] Refactor dask mapping --- xarray/core/weighted.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 4d245c366d1..41420f6a717 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -113,11 +113,8 @@ def _weight_check(w): return w.data if is_duck_dask_array(weights.data): - import dask.array as dsa - - weights.data = dsa.map_blocks( - _weight_check, weights.data, dtype=weights.dtype - ) + weights = weights.copy(data=weights.data.map_blocks(_weight_check, dtype=weights.dtype)) + else: weights.data = _weight_check(weights.data) From 681f34dd7788040b0f12daa69ee0d75dae50e178 Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Wed, 4 Nov 2020 10:41:36 -0500 Subject: [PATCH 11/20] Try duck_array_ops.isnull --- xarray/core/weighted.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 41420f6a717..aabff9650c2 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -2,6 +2,7 @@ import numpy as np +from . import duck_array_ops from .computation import dot from .options import _get_keep_attrs from .pycompat import is_duck_dask_array @@ -105,12 +106,12 @@ def __init__(self, obj, weights): def _weight_check(w): # Ref https://github.com/pydata/xarray/pull/4559/files#r515968670 - if np.isnan(w).any(): + if duck_array_ops.isnull(w).any(): raise ValueError( "`weights` cannot contain missing values. " "Missing values can be replaced by `weights.fillna(0)`." ) - return w.data + return w if is_duck_dask_array(weights.data): weights = weights.copy(data=weights.data.map_blocks(_weight_check, dtype=weights.dtype)) From 085256495662a74d284ccb0b4b89fc9d8badcc3a Mon Sep 17 00:00:00 2001 From: jbusecke Date: Wed, 4 Nov 2020 11:30:04 -0500 Subject: [PATCH 12/20] black formatting --- xarray/core/weighted.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index aabff9650c2..f2610bbe41a 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -114,8 +114,10 @@ def _weight_check(w): return w if is_duck_dask_array(weights.data): - weights = weights.copy(data=weights.data.map_blocks(_weight_check, dtype=weights.dtype)) - + weights = weights.copy( + data=weights.data.map_blocks(_weight_check, dtype=weights.dtype) + ) + else: weights.data = _weight_check(weights.data) From 64bef42524a47d48711bab99290f8fa54d21cea2 Mon Sep 17 00:00:00 2001 From: jbusecke Date: Wed, 4 Nov 2020 11:41:37 -0500 Subject: [PATCH 13/20] Add whatsnew --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 0ad0f920102..7e2f11b9cbc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -32,6 +32,8 @@ New Features By `Miguel Jimenez `_ and `Wei Ji Leong `_. - Unary & binary operations follow the ``keep_attrs`` flag (:issue:`3490`, :issue:`4065`, :issue:`3433`, :issue:`3595`, :pull:`4195`). By `Deepak Cherian `_. +- :py:meth:`Dataset.weighted` and :py:meth:`DataArray.weighted` are now executing value checks lazily if weights are provided as dask arrays (:issue:`4541`, :pull:`4559`). + By `Julius Busecke `_. Bug fixes ~~~~~~~~~ From c81a829bb12e9bb7a874baa3b69fd999d4ac756c Mon Sep 17 00:00:00 2001 From: Julius Busecke Date: Wed, 4 Nov 2020 12:37:54 -0500 Subject: [PATCH 14/20] Remove numpy --- xarray/core/weighted.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index f2610bbe41a..38339b2e30c 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,6 +1,5 @@ from typing import TYPE_CHECKING, Hashable, Iterable, Optional, Union, overload -import numpy as np from . import duck_array_ops from .computation import dot From 6a85409f68d7926eae668c6acffcfa5fcf89f034 Mon Sep 17 00:00:00 2001 From: jbusecke Date: Thu, 5 Nov 2020 09:53:24 -0500 Subject: [PATCH 15/20] apply isort --- xarray/core/weighted.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 38339b2e30c..d5829089ff4 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -1,6 +1,5 @@ from typing import TYPE_CHECKING, Hashable, Iterable, Optional, Union, overload - from . import duck_array_ops from .computation import dot from .options import _get_keep_attrs From a5b4cf347384bcc1d302b491347684454d22760b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 5 Nov 2020 11:18:04 -0700 Subject: [PATCH 16/20] Update xarray/core/weighted.py --- xarray/core/weighted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index d5829089ff4..332fc3a1f55 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -117,7 +117,7 @@ def _weight_check(w): ) else: - weights.data = _weight_check(weights.data) + weights = weights.copy(data=_weight_check(weights.data)) self.obj = obj self.weights = weights From 98c4e32d6b419cf731809c3ca883ae2485aade1c Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Thu, 5 Nov 2020 12:28:39 -0700 Subject: [PATCH 17/20] Update xarray/core/weighted.py Co-authored-by: Mathias Hauser --- xarray/core/weighted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 332fc3a1f55..8e2921331c1 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -117,7 +117,7 @@ def _weight_check(w): ) else: - weights = weights.copy(data=_weight_check(weights.data)) + _weight_check(weights.data) self.obj = obj self.weights = weights From 89646080dfea5b539d3d56da5587cfe4f0ea248e Mon Sep 17 00:00:00 2001 From: jbusecke Date: Fri, 6 Nov 2020 11:11:09 -0500 Subject: [PATCH 18/20] black formatting --- xarray/core/weighted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 8e2921331c1..021a63e9a12 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -117,7 +117,7 @@ def _weight_check(w): ) else: - _weight_check(weights.data) + _weight_check(weights.data) self.obj = obj self.weights = weights From c49d312f2fb86bcad1db338e2bcb57b605d8c754 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Nov 2020 14:42:02 +0100 Subject: [PATCH 19/20] Update xarray/core/weighted.py --- xarray/core/weighted.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index 021a63e9a12..d00423b7d98 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -112,9 +112,7 @@ def _weight_check(w): return w if is_duck_dask_array(weights.data): - weights = weights.copy( - data=weights.data.map_blocks(_weight_check, dtype=weights.dtype) - ) + weights.data.map_blocks(_weight_check, dtype=weights.dtype) else: _weight_check(weights.data) From 1d59e0ff3e9b6787f5bd0e3ebc6a263d358baf05 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 9 Nov 2020 16:24:43 +0100 Subject: [PATCH 20/20] Update xarray/core/weighted.py --- xarray/core/weighted.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index d00423b7d98..ab4a0958866 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -112,7 +112,10 @@ def _weight_check(w): return w if is_duck_dask_array(weights.data): - weights.data.map_blocks(_weight_check, dtype=weights.dtype) + # assign to copy - else the check is not triggered + weights = weights.copy( + data=weights.data.map_blocks(_weight_check, dtype=weights.dtype) + ) else: _weight_check(weights.data)