NCAS-CMS · davidhassell · Apr 12, 2022 · Apr 5, 2022 · Apr 12, 2022
diff --git a/cf/data/data.py b/cf/data/data.py
@@ -7250,6 +7250,7 @@ def any(self):
 
         return False
 
+    @daskified(_DASKIFIED_VERBOSE)
     @_inplace_enabled(default=False)
     def apply_masking(
         self,
@@ -7277,8 +7278,9 @@ def apply_masking(
                 elements exactly equal to any of the values are set to
                 missing data.
 
-                If True then the value returned by the `get_fill_value`
-                method, if such a value exists, is used.
+                If True then the value returned by the
+                `get_fill_value` method, if such a value exists, is
+                used.
 
                 Zero or more values may be provided in a sequence of
                 scalars.
@@ -7299,21 +7301,21 @@ def apply_masking(
                   ``fill_value=[]``
 
             valid_min: number, optional
-                A scalar specifying the minimum valid value. Data elements
-                strictly less than this number will be set to missing
-                data.
+                A scalar specifying the minimum valid value. Data
+                elements strictly less than this number will be set to
+                missing data.
 
             valid_max: number, optional
-                A scalar specifying the maximum valid value. Data elements
-                strictly greater than this number will be set to missing
-                data.
+                A scalar specifying the maximum valid value. Data
+                elements strictly greater than this number will be set
+                to missing data.
 
             valid_range: (number, number), optional
-                A vector of two numbers specifying the minimum and maximum
-                valid values, equivalent to specifying values for both
-                *valid_min* and *valid_max* parameters. The *valid_range*
-                parameter must not be set if either *valid_min* or
-                *valid_max* is defined.
+                A vector of two numbers specifying the minimum and
+                maximum valid values, equivalent to specifying values
+                for both *valid_min* and *valid_max* parameters. The
+                *valid_range* parameter must not be set if either
+                *valid_min* or *valid_max* is defined.
 
                 *Parameter example:*
                   ``valid_range=[-999, 10000]`` is equivalent to setting
@@ -7327,54 +7329,52 @@ def apply_masking(
                 The data with masked values. If the operation was in-place
                 then `None` is returned.
 
-        **Examples:**
+        **Examples**
 
         >>> import numpy
-        >>> d = Data(numpy.arange(12).reshape(3, 4), 'm')
-        >>> d[1, 1] = masked
+        >>> d = cf.Data(numpy.arange(12).reshape(3, 4), 'm')
+        >>> d[1, 1] = cf.masked
         >>> print(d.array)
-        [[0  1  2  3]
-         [4 --  6  7]
-         [8  9 10 11]]
-
+        [[0 1 2 3]
+         [4 -- 6 7]
+         [8 9 10 11]]
         >>> print(d.apply_masking().array)
-        [[0  1  2  3]
-         [4 --  6  7]
-         [8  9 10 11]]
+        [[0 1 2 3]
+         [4 -- 6 7]
+         [8 9 10 11]]
         >>> print(d.apply_masking(fill_values=[0]).array)
-        [[--  1  2  3]
-         [ 4 --  6  7]
-         [ 8  9 10 11]]
+        [[-- 1 2 3]
+         [4 -- 6 7]
+         [8 9 10 11]]
         >>> print(d.apply_masking(fill_values=[0, 11]).array)
-        [[--  1  2  3]
-         [ 4 --  6  7]
-         [ 8  9 10 --]]
-
+        [[-- 1 2 3]
+         [4 -- 6 7]
+         [8 9 10 --]]
         >>> print(d.apply_masking(valid_min=3).array)
-        [[-- -- --  3]
-         [ 4 --  6  7]
-         [ 8  9 10 11]]
+        [[-- -- -- 3]
+         [4 -- 6 7]
+         [8 9 10 11]]
         >>> print(d.apply_masking(valid_max=6).array)
-        [[ 0  1  2  3]
-         [ 4 --  6 --]
+        [[0 1 2 3]
+         [4 -- 6 --]
          [-- -- -- --]]
         >>> print(d.apply_masking(valid_range=[2, 8]).array)
-        [[-- --  2  3]
-         [ 4 --  6  7]
-         [ 8 -- -- --]]
-
+        [[-- -- 2 3]
+         [4 -- 6 7]
+         [8 -- -- --]]
         >>> d.set_fill_value(7)
         >>> print(d.apply_masking(fill_values=True).array)
-        [[0  1  2  3]
-         [4 --  6 --]
-         [8  9 10 11]]
+        [[0 1 2 3]
+         [4 -- 6 --]
+         [8 9 10 11]]
         >>> print(d.apply_masking(fill_values=True,
         ...                       valid_range=[2, 8]).array)
-        [[-- --  2  3]
-         [ 4 --  6 --]
-         [ 8 -- -- --]]
+        [[-- -- 2 3]
+         [4 -- 6 --]
+         [8 -- -- --]]
 
         """
+        # Parse valid_range
         if valid_range is not None:
             if valid_min is not None or valid_max is not None:
                 raise ValueError(
@@ -7396,8 +7396,7 @@ def apply_masking(
 
             valid_min, valid_max = valid_range
 
-        d = _inplace_enabled_define_and_cleanup(self)
-
+        # Parse fill_values
         if fill_values is None:
             fill_values = False
 
@@ -7412,45 +7411,45 @@ def apply_masking(
                 fill_values = ()
         else:
             try:
-                _ = iter(fill_values)
+                iter(fill_values)
             except TypeError:
                 raise TypeError(
                     "'fill_values' parameter must be a sequence or "
-                    "of type bool. Got type {}".format(type(fill_values))
+                    f"of type bool. Got type {type(fill_values)}"
                 )
             else:
                 if isinstance(fill_values, str):
                     raise TypeError(
                         "'fill_values' parameter must be a sequence or "
-                        "of type bool. Got type {}".format(type(fill_values))
+                        f"of type bool. Got type {type(fill_values)}"
                     )
-        # --- End: if
 
-        mask = None
+        d = _inplace_enabled_define_and_cleanup(self)
+        dx = self._get_dask()
 
+        mask = None
         if fill_values:
-            mask = d == fill_values[0]
+            mask = dx == fill_values[0]
 
             for fill_value in fill_values[1:]:
-                mask |= d == fill_value
-        # --- End: for
+                mask |= dx == fill_value
 
         if valid_min is not None:
             if mask is None:
-                mask = d < valid_min
+                mask = dx < valid_min
             else:
-                mask |= d < valid_min
-        # --- End: if
+                mask |= dx < valid_min
 
         if valid_max is not None:
             if mask is None:
-                mask = d > valid_max
+                mask = dx > valid_max
             else:
-                mask |= d > valid_max
-        # --- End: if
+                mask |= dx > valid_max
 
         if mask is not None:
-            d.where(mask, cf_masked, inplace=True)
+            dx = da.ma.masked_where(mask, dx)
+
+        d._set_dask(dx, reset_mask_hardness=True)
 
         return d
 

diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py
@@ -492,12 +492,6 @@ def test_Data_halo(self):
             d.halo(4)
 
     def test_Data_mask(self):
-        if self.test_only and inspect.stack()[0][3] not in self.test_only:
-            return
-
-        # TODODASK: once test_Data_apply_masking is passing after daskification
-        # of apply_masking, might make sense to combine this test with that?
-
         # Test for a masked Data object (having some masked points)
         a = self.ma
         d = cf.Data(a, units="m")
@@ -531,53 +525,52 @@ def test_Data_mask(self):
         self.assertTrue(d3.mask.hardmask)
         self.assertTrue(d3.mask.array[1], True)
 
-    @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'")
     def test_Data_apply_masking(self):
-        if self.test_only and inspect.stack()[0][3] not in self.test_only:
-            return
+        a = np.ma.arange(12).reshape(3, 4)
+        a[1, 1] = np.ma.masked
+        d = cf.Data(a, units="m", chunks=2)
 
-        a = self.ma
-        d = cf.Data(a, units="m")
+        self.assertIsNone(d.apply_masking(inplace=True))
 
-        b = a.copy()
+        b = a
         e = d.apply_masking()
         self.assertTrue((b == e.array).all())
         self.assertTrue((b.mask == e.mask.array).all())
 
-        b = np.ma.where(a == 0, np.ma.masked, a)
+        b = np.ma.masked_where(a == 0, a)
         e = d.apply_masking(fill_values=[0])
         self.assertTrue((b == e.array).all())
         self.assertTrue((b.mask == e.mask.array).all())
 
-        b = np.ma.where((a == 0) | (a == 11), np.ma.masked, a)
+        b = np.ma.masked_where((a == 0) | (a == 11), a)
         e = d.apply_masking(fill_values=[0, 11])
         self.assertTrue((b == e.array).all())
         self.assertTrue((b.mask == e.mask.array).all())
 
-        b = np.ma.where(a < 30, np.ma.masked, a)
-        e = d.apply_masking(valid_min=30)
+        b = np.ma.masked_where(a < 3, a)
+        e = d.apply_masking(valid_min=3)
         self.assertTrue((b == e.array).all())
         self.assertTrue((b.mask == e.mask.array).all())
 
-        b = np.ma.where(a > -60, np.ma.masked, a)
-        e = d.apply_masking(valid_max=-60)
+        b = np.ma.masked_where(a > 8, a)
+        e = d.apply_masking(valid_max=8)
         self.assertTrue((b == e.array).all())
         self.assertTrue((b.mask == e.mask.array).all())
 
-        b = np.ma.where((a < -20) | (a > 80), np.ma.masked, a)
-        e = d.apply_masking(valid_range=[-20, 80])
+        b = np.ma.masked_where((a < 2) | (a > 8), a)
+        e = d.apply_masking(valid_range=[2, 8])
         self.assertTrue((b == e.array).all())
         self.assertTrue((b.mask == e.mask.array).all())
 
-        d.set_fill_value(70)
+        d.set_fill_value(7)
 
-        b = np.ma.where(a == 70, np.ma.masked, a)
+        b = np.ma.masked_where(a == 7, a)
         e = d.apply_masking(fill_values=True)
         self.assertTrue((b == e.array).all())
         self.assertTrue((b.mask == e.mask.array).all())
 
-        b = np.ma.where((a == 70) | (a < 20) | (a > 80), np.ma.masked, a)
-        e = d.apply_masking(fill_values=True, valid_range=[20, 80])
+        b = np.ma.masked_where((a == 7) | (a < 2) | (a > 8), a)
+        e = d.apply_masking(fill_values=True, valid_range=[2, 8])
         self.assertTrue((b == e.array).all())
         self.assertTrue((b.mask == e.mask.array).all())