From c2e7872111d10c3005bc07aaae4babd498a7ed2a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 6 Apr 2022 11:37:11 +0100 Subject: [PATCH 1/5] masked_invalid --- cf/data/data.py | 66 ++++++++++---------------------------------- cf/functions.py | 3 +- cf/test/test_Data.py | 20 ++++++++++++++ 3 files changed, 35 insertions(+), 54 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index f9c2d3bb50..8c1482e48d 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -10733,25 +10733,13 @@ def datum(self, *index): return cf_masked + @daskified(_DASKIFIED_VERBOSE) @_deprecated_kwarg_check("i") @_inplace_enabled(default=False) def mask_invalid(self, inplace=False, i=False): """Mask the array where invalid values occur (NaN or inf). - Note that: - - * Invalid values in the results of arithmetic operations may only - occur if the raising of `FloatingPointError` exceptions has been - suppressed by `cf.Data.seterr`. - - * If the raising of `FloatingPointError` exceptions has been - allowed then invalid values in the results of arithmetic - operations it is possible for them to be automatically converted - to masked values, depending on the setting of - `cf.Data.mask_fpe`. In this case, such automatic conversion - might be faster than calling `mask_invalid`. - - .. seealso:: `cf.Data.mask_fpe`, `cf.Data.seterr` + .. seealso:: `where` :Parameters: @@ -10762,50 +10750,24 @@ def mask_invalid(self, inplace=False, i=False): :Returns: `Data` or `None` + The masked data, or `None` if the operation was + in-place. - **Examples:** - - >>> d = cf.Data([0., 1]) - >>> e = cf.Data([1., 2]) - >>> old = cf.Data.seterr('ignore') - - >>> f = e/d - >>> f - - >>> f.mask_invalid() - + **Examples** - >>> f=e**12345 + >>> d = cf.Data([0, 1, 2]) + >>> e = cf.Data([0, 2, 0]) + >>> f = d / e >>> f - - >>> f.mask_invalid() - - - >>> old = cf.Data.seterr('raise') - >>> old = cf.Data.mask_fpe(True) - >>> e/d - - >>> e**12345 - + + >>> f.masked_invalid() + """ d = _inplace_enabled_define_and_cleanup(self) - - config = d.partition_configuration(readonly=False) - - for partition in d.partitions.matrix.flat: - partition.open(config) - array = partition.array - - array = np.ma.masked_invalid(array, copy=False) - array.shrink_mask() - if array.mask is np.ma.nomask: - array = array.data - - partition.subarray = array - - partition.close() - + dx = self._get_dask() + dx = da.ma.masked_invalid(dx) + d.set_dask(dx, reset_mask_hardness=False) return d def del_calendar(self, default=ValueError()): diff --git a/cf/functions.py b/cf/functions.py index ebb12c20fa..4c8355a38f 100644 --- a/cf/functions.py +++ b/cf/functions.py @@ -13,7 +13,6 @@ from collections.abc import Iterable from itertools import product from marshal import dumps -from math import ceil as math_ceil from numbers import Integral from os import getpid, listdir, mkdir from os.path import abspath as _os_path_abspath @@ -25,9 +24,9 @@ import cfdm import netCDF4 +import numpy as np from dask import config from dask.utils import parse_bytes -import numpy as np from numpy import __file__ as _numpy__file__ from numpy import __version__ as _numpy__version__ from numpy import all as _numpy_all diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 9e6a610ab8..e803566d28 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -3938,6 +3938,26 @@ def test_Data_tolist(self): self.assertEqual(e, np.array(x).tolist()) self.assertTrue(d.equals(cf.Data(e))) + @unittest.skipIf(TEST_DASKIFIED_ONLY, "Needs __div__") + def test_Data_masked_invalid(self): + a = np.array([0, 1, 2]) + b = np.array([0, 2, 0]) + + d = cf.Data(a, "m") + e = cf.Data(b, "m") + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", category=RuntimeWarning) + c = np.ma.masked_invalid(a / b) + f = (d / e).masked_invalid().array + + self.assertTrue((f.mask == c.mask).all()) + self.assertTrue((f == c).all()) + + d0 = d.copy() + self.assertIsNone(d.masked_invalid(inplace=True)) + self.assertTrue(d.equals(d0)) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) From d6217b4d4e7e8999ffdcaa26dfed9ee3c828f14a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Thu, 21 Apr 2022 11:00:04 +0100 Subject: [PATCH 2/5] linting --- cf/data/data.py | 1 - cf/test/test_Data.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index 0c15cb132a..21c22d100c 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -34,7 +34,6 @@ abspath, ) from ..functions import atol as cf_atol -from ..functions import chunksize as cf_chunksize from ..functions import default_netCDF_fillvals from ..functions import fm_threshold as cf_fm_threshold from ..functions import free_memory diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 94bb864d17..b69ada95d4 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -3801,11 +3801,11 @@ def test_Data_masked_invalid(self): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) i = a / b - + d = cf.Data(i, "m") e = d.masked_invalid().array c = np.ma.masked_invalid(i) - + self.assertTrue((e.mask == c.mask).all()) self.assertTrue((e == c).all()) From dadcd784de35922d895fe00984f0c94205ad254e Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 29 Apr 2022 08:50:09 +0100 Subject: [PATCH 3/5] _get_dask -> to_dask_array --- cf/data/data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index ab5b5bc640..eb22f7ec7d 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -845,7 +845,7 @@ def __bool__(self): "elements is ambiguous. Use d.any() or d.all()" ) - return bool(self._get_dask()) + return bool(self.to_dask_array()) def __repr__(self): """Called by the `repr` built-in function. @@ -5665,7 +5665,7 @@ def all(self, axis=None, keepdims=True, split_every=None): """ d = self.copy(array=False) - dx = self._get_dask() + dx = self.to_dask_array() dx = da.all(dx, axis=axis, keepdims=keepdims, split_every=split_every) d._set_dask(dx, reset_mask_hardness=False) d.hardmask = _DEFAULT_HARDMASK @@ -5782,7 +5782,7 @@ def any(self, axis=None, keepdims=True, split_every=None): """ d = self.copy(array=False) - dx = self._get_dask() + dx = self.to_dask_array() dx = da.any(dx, axis=axis, keepdims=keepdims, split_every=split_every) d._set_dask(dx, reset_mask_hardness=False) d.hardmask = _DEFAULT_HARDMASK @@ -9228,7 +9228,7 @@ def masked_invalid(self, inplace=False): """ d = _inplace_enabled_define_and_cleanup(self) - dx = self._get_dask() + dx = self.to_dask_array() dx = da.ma.masked_invalid(dx) d._set_dask(dx, reset_mask_hardness=False) return d From 73c606727b702927c1aba52dc30351d138a61763 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 3 May 2022 08:51:45 +0100 Subject: [PATCH 4/5] Typo Co-authored-by: Sadie L. Bartholomew --- cf/data/mixin/deprecations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cf/data/mixin/deprecations.py b/cf/data/mixin/deprecations.py index d42dc1d67d..0e3491a6db 100644 --- a/cf/data/mixin/deprecations.py +++ b/cf/data/mixin/deprecations.py @@ -628,7 +628,7 @@ def mask_fpe(*arg): def mask_invalid(self, *args, **kwargs): """Mask the array where invalid values occur (NaN or inf). - Deprecated at veriosn TODODASK. Use the method + Deprecated at version TODODASK. Use the method `masked_invalid` instead. .. seealso:: `where` From 98d8eb5570f6457bd027e2b15e4b5a19bf076fa5 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 4 May 2022 14:33:03 +0100 Subject: [PATCH 5/5] masked_invalid in docstrings --- cf/data/data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index eb22f7ec7d..2cca7f7ed7 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -5374,7 +5374,7 @@ def arctanh(self, inplace=False): >>> d.arctanh(inplace=True) >>> print(d.array) [nan inf 1.0986122886681098 0.6931471805599453 --] - >>> d.mask_invalid(inplace=True) + >>> d.masked_invalid(inplace=True) >>> print(d.array) [-- -- 1.0986122886681098 0.6931471805599453 --] @@ -5429,7 +5429,7 @@ def arcsin(self, inplace=False): >>> d.arcsin(inplace=True) >>> print(d.array) [nan 1.5707963267948966 0.9272952180016123 0.6435011087932844 --] - >>> d.mask_invalid(inplace=True) + >>> d.masked_invalid(inplace=True) >>> print(d.array) [-- 1.5707963267948966 0.9272952180016123 0.6435011087932844 --] @@ -5533,7 +5533,7 @@ def arccos(self, inplace=False): >>> d.arccos(inplace=True) >>> print(d.array) [nan 0.0 0.6435011087932843 0.9272952180016123 --] - >>> d.mask_invalid(inplace=True) + >>> d.masked_invalid(inplace=True) >>> print(d.array) [-- 0.0 0.6435011087932843 0.9272952180016123 --] @@ -5588,7 +5588,7 @@ def arccosh(self, inplace=False): >>> d.arccosh(inplace=True) >>> print(d.array) [0.6223625037147786 0.0 nan nan --] - >>> d.mask_invalid(inplace=True) + >>> d.masked_invalid(inplace=True) >>> print(d.array) [0.6223625037147786 0.0 -- -- --]