Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dask: Data.masked_invalid, Data.mask_invalid #390

Merged
merged 7 commits into from
May 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 21 additions & 62 deletions cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -845,7 +845,7 @@ def __bool__(self):
"elements is ambiguous. Use d.any() or d.all()"
)

return bool(self._get_dask())
return bool(self.to_dask_array())

def __repr__(self):
"""Called by the `repr` built-in function.
Expand Down Expand Up @@ -5374,7 +5374,7 @@ def arctanh(self, inplace=False):
>>> d.arctanh(inplace=True)
>>> print(d.array)
[nan inf 1.0986122886681098 0.6931471805599453 --]
>>> d.mask_invalid(inplace=True)
>>> d.masked_invalid(inplace=True)
>>> print(d.array)
[-- -- 1.0986122886681098 0.6931471805599453 --]

Expand Down Expand Up @@ -5429,7 +5429,7 @@ def arcsin(self, inplace=False):
>>> d.arcsin(inplace=True)
>>> print(d.array)
[nan 1.5707963267948966 0.9272952180016123 0.6435011087932844 --]
>>> d.mask_invalid(inplace=True)
>>> d.masked_invalid(inplace=True)
>>> print(d.array)
[-- 1.5707963267948966 0.9272952180016123 0.6435011087932844 --]

Expand Down Expand Up @@ -5533,7 +5533,7 @@ def arccos(self, inplace=False):
>>> d.arccos(inplace=True)
>>> print(d.array)
[nan 0.0 0.6435011087932843 0.9272952180016123 --]
>>> d.mask_invalid(inplace=True)
>>> d.masked_invalid(inplace=True)
>>> print(d.array)
[-- 0.0 0.6435011087932843 0.9272952180016123 --]

Expand Down Expand Up @@ -5588,7 +5588,7 @@ def arccosh(self, inplace=False):
>>> d.arccosh(inplace=True)
>>> print(d.array)
[0.6223625037147786 0.0 nan nan --]
>>> d.mask_invalid(inplace=True)
>>> d.masked_invalid(inplace=True)
>>> print(d.array)
[0.6223625037147786 0.0 -- -- --]

Expand Down Expand Up @@ -5665,7 +5665,7 @@ def all(self, axis=None, keepdims=True, split_every=None):

"""
d = self.copy(array=False)
dx = self._get_dask()
dx = self.to_dask_array()
dx = da.all(dx, axis=axis, keepdims=keepdims, split_every=split_every)
d._set_dask(dx, reset_mask_hardness=False)
d.hardmask = _DEFAULT_HARDMASK
Expand Down Expand Up @@ -5782,7 +5782,7 @@ def any(self, axis=None, keepdims=True, split_every=None):

"""
d = self.copy(array=False)
dx = self._get_dask()
dx = self.to_dask_array()
dx = da.any(dx, axis=axis, keepdims=keepdims, split_every=split_every)
d._set_dask(dx, reset_mask_hardness=False)
d.hardmask = _DEFAULT_HARDMASK
Expand Down Expand Up @@ -9199,79 +9199,38 @@ def datum(self, *index):

return cf_masked

@_deprecated_kwarg_check("i")
@daskified(_DASKIFIED_VERBOSE)
@_inplace_enabled(default=False)
def mask_invalid(self, inplace=False, i=False):
def masked_invalid(self, inplace=False):
"""Mask the array where invalid values occur (NaN or inf).

Note that:

* Invalid values in the results of arithmetic operations may only
occur if the raising of `FloatingPointError` exceptions has been
suppressed by `cf.Data.seterr`.

* If the raising of `FloatingPointError` exceptions has been
allowed then invalid values in the results of arithmetic
operations it is possible for them to be automatically converted
to masked values, depending on the setting of
`cf.Data.mask_fpe`. In this case, such automatic conversion
might be faster than calling `mask_invalid`.

.. seealso:: `cf.Data.mask_fpe`, `cf.Data.seterr`
.. seealso:: `where`

:Parameters:

{{inplace: `bool`, optional}}

{{i: deprecated at version 3.0.0}}

:Returns:

`Data` or `None`
The masked data, or `None` if the operation was
in-place.

**Examples**

>>> d = cf.Data([0., 1])
>>> e = cf.Data([1., 2])
>>> old = cf.Data.seterr('ignore')

>>> f = e/d
>>> d = cf.Data([0, 1, 2])
>>> e = cf.Data([0, 2, 0])
>>> f = d / e
>>> f
<CF Data: [inf, 2.0] >
>>> f.mask_invalid()
<CF Data: [--, 2.0] >

>>> f=e**12345
>>> f
<CF Data: [1.0, inf] >
>>> f.mask_invalid()
<CF Data: [1.0, --] >

>>> old = cf.Data.seterr('raise')
>>> old = cf.Data.mask_fpe(True)
>>> e/d
<CF Data: [--, 2.0] >
>>> e**12345
<CF Data: [1.0, --] >
<CF Data(3): [nan, 0.5, inf]>
>>> f.masked_invalid()
<CF Data(3): [--, 0.5, --]>

"""
d = _inplace_enabled_define_and_cleanup(self)

config = d.partition_configuration(readonly=False)

for partition in d.partitions.matrix.flat:
partition.open(config)
array = partition.array

array = np.ma.masked_invalid(array, copy=False)
array.shrink_mask()
if array.mask is np.ma.nomask:
array = array.data

partition.subarray = array

partition.close()

dx = self.to_dask_array()
dx = da.ma.masked_invalid(dx)
d._set_dask(dx, reset_mask_hardness=False)
return d

def del_calendar(self, default=ValueError()):
Expand Down
39 changes: 39 additions & 0 deletions cf/data/mixin/deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,45 @@ def mask_fpe(*arg):
"https://github.com/dask/dask/issues/3245 for more details)."
)

def mask_invalid(self, *args, **kwargs):
"""Mask the array where invalid values occur (NaN or inf).

Deprecated at version TODODASK. Use the method
`masked_invalid` instead.

.. seealso:: `where`

:Parameters:

{{inplace: `bool`, optional}}

{{i: deprecated at version 3.0.0}}

:Returns:

`Data` or `None`
The masked data, or `None` if the operation was
in-place.

**Examples**

>>> d = cf.Data([0, 1, 2])
>>> e = cf.Data([0, 2, 0])
>>> f = d / e
>>> f
<CF Data(3): [nan, 0.5, inf]>
>>> f.mask_invalid()
<CF Data(3): [--, 0.5, --]>

"""
_DEPRECATION_ERROR_METHOD(
self,
"mask_invalid",
message="Use the method 'masked_invalid' instead.",
version="TODODASK",
removed_at="5.0.0",
) # pragma: no cover

def partition_boundaries(self):
"""Return the partition boundaries for each partition matrix
dimension.
Expand Down
16 changes: 16 additions & 0 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3842,6 +3842,22 @@ def test_Data_tolist(self):
self.assertEqual(e, np.array(x).tolist())
self.assertTrue(d.equals(cf.Data(e)))

def test_Data_masked_invalid(self):
a = np.array([0, 1, 2])
b = np.array([0, 2, 0])
with warnings.catch_warnings():
warnings.simplefilter("ignore", category=RuntimeWarning)
i = a / b

d = cf.Data(i, "m")
e = d.masked_invalid().array
c = np.ma.masked_invalid(i)

self.assertTrue((e.mask == c.mask).all())
self.assertTrue((e == c).all())

self.assertIsNone(d.masked_invalid(inplace=True))

def test_Data_uncompress(self):
import cfdm

Expand Down