Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dask: Data.apply_masking #374

Merged
merged 2 commits into from
Apr 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 61 additions & 62 deletions cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7250,6 +7250,7 @@ def any(self):

return False

@daskified(_DASKIFIED_VERBOSE)
@_inplace_enabled(default=False)
def apply_masking(
self,
Expand Down Expand Up @@ -7277,8 +7278,9 @@ def apply_masking(
elements exactly equal to any of the values are set to
missing data.

If True then the value returned by the `get_fill_value`
method, if such a value exists, is used.
If True then the value returned by the
`get_fill_value` method, if such a value exists, is
used.

Zero or more values may be provided in a sequence of
scalars.
Expand All @@ -7299,21 +7301,21 @@ def apply_masking(
``fill_value=[]``

valid_min: number, optional
A scalar specifying the minimum valid value. Data elements
strictly less than this number will be set to missing
data.
A scalar specifying the minimum valid value. Data
elements strictly less than this number will be set to
missing data.

valid_max: number, optional
A scalar specifying the maximum valid value. Data elements
strictly greater than this number will be set to missing
data.
A scalar specifying the maximum valid value. Data
elements strictly greater than this number will be set
to missing data.

valid_range: (number, number), optional
A vector of two numbers specifying the minimum and maximum
valid values, equivalent to specifying values for both
*valid_min* and *valid_max* parameters. The *valid_range*
parameter must not be set if either *valid_min* or
*valid_max* is defined.
A vector of two numbers specifying the minimum and
maximum valid values, equivalent to specifying values
for both *valid_min* and *valid_max* parameters. The
*valid_range* parameter must not be set if either
*valid_min* or *valid_max* is defined.

*Parameter example:*
``valid_range=[-999, 10000]`` is equivalent to setting
Expand All @@ -7327,54 +7329,52 @@ def apply_masking(
The data with masked values. If the operation was in-place
then `None` is returned.

**Examples:**
**Examples**

>>> import numpy
>>> d = Data(numpy.arange(12).reshape(3, 4), 'm')
>>> d[1, 1] = masked
>>> d = cf.Data(numpy.arange(12).reshape(3, 4), 'm')
>>> d[1, 1] = cf.masked
>>> print(d.array)
[[0 1 2 3]
[4 -- 6 7]
[8 9 10 11]]

[[0 1 2 3]
[4 -- 6 7]
[8 9 10 11]]
>>> print(d.apply_masking().array)
[[0 1 2 3]
[4 -- 6 7]
[8 9 10 11]]
[[0 1 2 3]
[4 -- 6 7]
[8 9 10 11]]
>>> print(d.apply_masking(fill_values=[0]).array)
[[-- 1 2 3]
[ 4 -- 6 7]
[ 8 9 10 11]]
[[-- 1 2 3]
[4 -- 6 7]
[8 9 10 11]]
>>> print(d.apply_masking(fill_values=[0, 11]).array)
[[-- 1 2 3]
[ 4 -- 6 7]
[ 8 9 10 --]]

[[-- 1 2 3]
[4 -- 6 7]
[8 9 10 --]]
>>> print(d.apply_masking(valid_min=3).array)
[[-- -- -- 3]
[ 4 -- 6 7]
[ 8 9 10 11]]
[[-- -- -- 3]
[4 -- 6 7]
[8 9 10 11]]
>>> print(d.apply_masking(valid_max=6).array)
[[ 0 1 2 3]
[ 4 -- 6 --]
[[0 1 2 3]
[4 -- 6 --]
[-- -- -- --]]
>>> print(d.apply_masking(valid_range=[2, 8]).array)
[[-- -- 2 3]
[ 4 -- 6 7]
[ 8 -- -- --]]

[[-- -- 2 3]
[4 -- 6 7]
[8 -- -- --]]
>>> d.set_fill_value(7)
>>> print(d.apply_masking(fill_values=True).array)
[[0 1 2 3]
[4 -- 6 --]
[8 9 10 11]]
[[0 1 2 3]
[4 -- 6 --]
[8 9 10 11]]
>>> print(d.apply_masking(fill_values=True,
... valid_range=[2, 8]).array)
[[-- -- 2 3]
[ 4 -- 6 --]
[ 8 -- -- --]]
[[-- -- 2 3]
[4 -- 6 --]
[8 -- -- --]]

"""
# Parse valid_range
if valid_range is not None:
if valid_min is not None or valid_max is not None:
raise ValueError(
Expand All @@ -7396,8 +7396,7 @@ def apply_masking(

valid_min, valid_max = valid_range

d = _inplace_enabled_define_and_cleanup(self)

# Parse fill_values
if fill_values is None:
fill_values = False

Expand All @@ -7412,45 +7411,45 @@ def apply_masking(
fill_values = ()
else:
try:
_ = iter(fill_values)
iter(fill_values)
except TypeError:
raise TypeError(
"'fill_values' parameter must be a sequence or "
"of type bool. Got type {}".format(type(fill_values))
f"of type bool. Got type {type(fill_values)}"
)
else:
if isinstance(fill_values, str):
raise TypeError(
"'fill_values' parameter must be a sequence or "
"of type bool. Got type {}".format(type(fill_values))
f"of type bool. Got type {type(fill_values)}"
)
# --- End: if

mask = None
d = _inplace_enabled_define_and_cleanup(self)
dx = self._get_dask()

mask = None
if fill_values:
mask = d == fill_values[0]
mask = dx == fill_values[0]

for fill_value in fill_values[1:]:
mask |= d == fill_value
# --- End: for
mask |= dx == fill_value

if valid_min is not None:
if mask is None:
mask = d < valid_min
mask = dx < valid_min
else:
mask |= d < valid_min
# --- End: if
mask |= dx < valid_min

if valid_max is not None:
if mask is None:
mask = d > valid_max
mask = dx > valid_max
else:
mask |= d > valid_max
# --- End: if
mask |= dx > valid_max

if mask is not None:
d.where(mask, cf_masked, inplace=True)
dx = da.ma.masked_where(mask, dx)

d._set_dask(dx, reset_mask_hardness=True)

return d

Expand Down
41 changes: 17 additions & 24 deletions cf/test/test_Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -492,12 +492,6 @@ def test_Data_halo(self):
d.halo(4)

def test_Data_mask(self):
if self.test_only and inspect.stack()[0][3] not in self.test_only:
return

# TODODASK: once test_Data_apply_masking is passing after daskification
# of apply_masking, might make sense to combine this test with that?

# Test for a masked Data object (having some masked points)
a = self.ma
d = cf.Data(a, units="m")
Expand Down Expand Up @@ -531,53 +525,52 @@ def test_Data_mask(self):
self.assertTrue(d3.mask.hardmask)
self.assertTrue(d3.mask.array[1], True)

@unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'")
def test_Data_apply_masking(self):
if self.test_only and inspect.stack()[0][3] not in self.test_only:
return
a = np.ma.arange(12).reshape(3, 4)
a[1, 1] = np.ma.masked
d = cf.Data(a, units="m", chunks=2)

a = self.ma
d = cf.Data(a, units="m")
self.assertIsNone(d.apply_masking(inplace=True))

b = a.copy()
b = a
e = d.apply_masking()
self.assertTrue((b == e.array).all())
self.assertTrue((b.mask == e.mask.array).all())

b = np.ma.where(a == 0, np.ma.masked, a)
b = np.ma.masked_where(a == 0, a)
e = d.apply_masking(fill_values=[0])
self.assertTrue((b == e.array).all())
self.assertTrue((b.mask == e.mask.array).all())

b = np.ma.where((a == 0) | (a == 11), np.ma.masked, a)
b = np.ma.masked_where((a == 0) | (a == 11), a)
e = d.apply_masking(fill_values=[0, 11])
self.assertTrue((b == e.array).all())
self.assertTrue((b.mask == e.mask.array).all())

b = np.ma.where(a < 30, np.ma.masked, a)
e = d.apply_masking(valid_min=30)
b = np.ma.masked_where(a < 3, a)
e = d.apply_masking(valid_min=3)
self.assertTrue((b == e.array).all())
self.assertTrue((b.mask == e.mask.array).all())

b = np.ma.where(a > -60, np.ma.masked, a)
e = d.apply_masking(valid_max=-60)
b = np.ma.masked_where(a > 8, a)
e = d.apply_masking(valid_max=8)
self.assertTrue((b == e.array).all())
self.assertTrue((b.mask == e.mask.array).all())

b = np.ma.where((a < -20) | (a > 80), np.ma.masked, a)
e = d.apply_masking(valid_range=[-20, 80])
b = np.ma.masked_where((a < 2) | (a > 8), a)
e = d.apply_masking(valid_range=[2, 8])
self.assertTrue((b == e.array).all())
self.assertTrue((b.mask == e.mask.array).all())

d.set_fill_value(70)
d.set_fill_value(7)

b = np.ma.where(a == 70, np.ma.masked, a)
b = np.ma.masked_where(a == 7, a)
e = d.apply_masking(fill_values=True)
self.assertTrue((b == e.array).all())
self.assertTrue((b.mask == e.mask.array).all())

b = np.ma.where((a == 70) | (a < 20) | (a > 80), np.ma.masked, a)
e = d.apply_masking(fill_values=True, valid_range=[20, 80])
b = np.ma.masked_where((a == 7) | (a < 2) | (a > 8), a)
e = d.apply_masking(fill_values=True, valid_range=[2, 8])
self.assertTrue((b == e.array).all())
self.assertTrue((b.mask == e.mask.array).all())

Expand Down