SciTools · pp-mo · Oct 11, 2017 · Aug 22, 2017 · Aug 23, 2017 · Aug 24, 2017
diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py
@@ -741,6 +741,26 @@ def _setncattr(variable, name, attribute):
     return variable.setncattr(name, attribute)
 
 
+class _FillValueMaskCheckAndStoreTarget(object):
+    """
+    To be used with da.store. Remembers whether any element was equal to a
+    given value and whether it was masked, before passing the chunk to the
+    given target.
+
+    """
+    def __init__(self, target, fill_value=None):
+        self.target = target
+        self.fill_value = fill_value
+        self.contains_value = False
+        self.is_masked = False
+
+    def __setitem__(self, keys, arr):
+        if self.fill_value is not None:
+            self.contains_value |= self.fill_value in arr
+        self.is_masked |= ma.isMaskedArray(arr)
+        self.target[keys] = arr
+
+
 class Saver(object):
     """A manager for saving netcdf files."""
 
@@ -812,7 +832,7 @@ def __exit__(self, type, value, traceback):
     def write(self, cube, local_keys=None, unlimited_dimensions=None,
               zlib=False, complevel=4, shuffle=True, fletcher32=False,
               contiguous=False, chunksizes=None, endian='native',
-              least_significant_digit=None, packing=None):
+              least_significant_digit=None, packing=None, fill_value=None):
         """
         Wrapper for saving cubes to a NetCDF file.
 
@@ -899,14 +919,20 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None,
             http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values
             If this argument is a type (or type string), appropriate values of
             scale_factor and add_offset will be automatically calculated based
-            on `cube.data` and possible masking. For masked data, `fill_value`
-            is taken from netCDF4.default_fillvals. For more control, pass a
-            dict with one or more of the following keys: `dtype` (required),
-            `scale_factor`, `add_offset`, and `fill_value`. Note that automatic
-            calculation of packing parameters will trigger loading of lazy
-            data; set them manually using a dict to avoid this. The default is
-            `None`, in which case the datatype is determined from the cube and
-            no packing will occur.
+            on `cube.data` and possible masking. For more control, pass a dict
+            with one or more of the following keys: `dtype` (required),
+            `scale_factor` and `add_offset`. Note that automatic calculation of
+            packing parameters will trigger loading of lazy data; set them
+            manually using a dict to avoid this. The default is `None`, in
+            which case the datatype is determined from the cube and no packing
+            will occur.
+
+        * fill_value:
+            The value to use for the `_FillValue` attribute on the netCDF
+            variable. If `packing` is specified the value of `fill_value`
+            should be in the domain of the packed data. If this argument is not
+            supplied and the data is masked, `fill_value` is taken from
+            netCDF4.default_fillvals
 
         Returns:
             None.
@@ -955,7 +981,8 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None,
             cube, dimension_names, local_keys, zlib=zlib, complevel=complevel,
             shuffle=shuffle, fletcher32=fletcher32, contiguous=contiguous,
             chunksizes=chunksizes, endian=endian,
-            least_significant_digit=least_significant_digit, packing=packing)
+            least_significant_digit=least_significant_digit, packing=packing,
+            fill_value=fill_value)
 
         # Add coordinate variables.
         self._add_dim_coords(cube, dimension_names)
@@ -1840,7 +1867,7 @@ def add_ellipsoid(ellipsoid):
             _setncattr(cf_var_cube, 'grid_mapping', cs.grid_mapping_name)
 
     def _create_cf_data_variable(self, cube, dimension_names, local_keys=None,
-                                 **kwargs):
+                                 packing=None, fill_value=None, **kwargs):
         """
         Create CF-netCDF data variable for the cube and any associated grid
         mapping.
@@ -1855,8 +1882,11 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None,
         Kwargs:
 
         * local_keys (iterable of strings):
-            An interable of cube attribute keys. Any cube attributes
-            with matching keys will become attributes on the data variable.
+            * see :func:`iris.fileformats.netcdf.Saver.write`
+        * packing (type or string or dict or list):
+            * see :func:`iris.fileformats.netcdf.Saver.write`
+        * fill_value:
+            * see :func:`iris.fileformats.netcdf.Saver.write`
 
         All other keywords are passed through to the dataset's `createVariable`
         method.
@@ -1865,47 +1895,32 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None,
             The newly created CF-netCDF data variable.
 
         """
-        if 'packing' in kwargs:
-            packing = kwargs.pop('packing')
-            if packing:
-                scale_factor = None
-                add_offset = None
-                fill_value = None
-                if isinstance(packing, dict):
-                    if 'dtype' not in packing:
-                        msg = "The dtype attribute is required for packing."
-                        raise ValueError(msg)
-                    dtype = np.dtype(packing['dtype'])
-                    if 'scale_factor' in packing:
-                        scale_factor = packing['scale_factor']
-                    if 'add_offset' in packing:
-                        add_offset = packing['add_offset']
-                    if 'fill_value' in packing:
-                        fill_value = packing['fill_value']
+
+        if packing:
+            if isinstance(packing, dict):
+                if 'dtype' not in packing:
+                    msg = "The dtype attribute is required for packing."
+                    raise ValueError(msg)
+                dtype = np.dtype(packing['dtype'])
+                scale_factor = packing.get('scale_factor', None)
+                add_offset = packing.get('add_offset', None)
+            else:
+                masked = ma.isMaskedArray(cube.data)
+                dtype = np.dtype(packing)
+                cmax = cube.data.max()
+                cmin = cube.data.min()
+                n = dtype.itemsize * 8
+                if masked:
+                    scale_factor = (cmax - cmin)/(2**n-2)
                 else:
-                    masked = ma.isMaskedArray(cube.data)
-                    dtype = np.dtype(packing)
-                    cmax = cube.data.max()
-                    cmin = cube.data.min()
-                    n = dtype.itemsize * 8
+                    scale_factor = (cmax - cmin)/(2**n-1)
+                if dtype.kind == 'u':
+                    add_offset = cmin
+                elif dtype.kind == 'i':
                     if masked:
-                        scale_factor = (cmax - cmin)/(2**n-2)
+                        add_offset = (cmax + cmin)/2
                     else:
-                        scale_factor = (cmax - cmin)/(2**n-1)
-                    if dtype.kind == 'u':
-                        add_offset = cmin
-                    elif dtype.kind == 'i':
-                        if masked:
-                            add_offset = (cmax + cmin)/2
-                        else:
-                            add_offset = cmin + 2**(n-1)*scale_factor
-                needs_fill_value = (cube.has_lazy_data() or
-                                    ma.isMaskedArray(cube.data))
-                if needs_fill_value and fill_value is None:
-                    dtstr = dtype.str[1:]
-                    fill_value = netCDF4.default_fillvals[dtstr]
-        else:
-            packing = None
+                        add_offset = cmin + 2**(n-1)*scale_factor
 
         def set_packing_ncattrs(cfvar):
             """Set netCDF packing attributes."""
@@ -1924,36 +1939,53 @@ def set_packing_ncattrs(cfvar):
             self._dataset.file_format in ('NETCDF3_CLASSIC',
                                           'NETCDF3_64BIT')):
 
-            if packing is None:
-                # Determine whether there is a cube MDI value.
-                fill_value = get_fill_value(cube.core_data())
-
             # Get the values in a form which is valid for the file format.
             data = self._ensure_valid_dtype(cube.data, 'cube', cube)
 
-            if packing is None:
-                dtype = data.dtype.newbyteorder('=')
-
-            # Create the cube CF-netCDF data variable with data payload.
-            cf_var = self._dataset.createVariable(
-                cf_name, dtype, dimension_names,
-                fill_value=fill_value, **kwargs)
-            set_packing_ncattrs(cf_var)
-            cf_var[:] = data
-
+            def store(data, cf_var, fill_value):
+                cf_var[:] = data
+                is_masked = ma.isMaskedArray(data)
+                contains_value = fill_value is not None and fill_value in data
+                return is_masked, contains_value
         else:
-            # Create the cube CF-netCDF data variable.
-            if packing is None:
-                fill_value = get_fill_value(cube.core_data())
-                dtype = cube.dtype.newbyteorder('=')
-
-            cf_var = self._dataset.createVariable(
-                cf_name, dtype,
-                dimension_names, fill_value=fill_value,
-                **kwargs)
-            set_packing_ncattrs(cf_var)
-
-            da.store([cube.lazy_data()], [cf_var])
+            data = cube.lazy_data()
+
+            def store(data, cf_var, fill_value):
+                # Store lazy data and check whether it is masked and contains
+                # the fill value
+                target = _FillValueMaskCheckAndStoreTarget(cf_var, fill_value)
+                da.store([data], [target])
+                return target.is_masked, target.contains_value
+
+        if not packing:
+            dtype = data.dtype.newbyteorder('=')
+
+        if fill_value is None:
+            fill_value = netCDF4.default_fillvals[dtype.str[1:]]
+            # Ensure it is the correct dtype
+            fill_value = dtype.type(fill_value)
+
+        # Create the cube CF-netCDF data variable with data payload.
+        cf_var = self._dataset.createVariable(cf_name, dtype, dimension_names,
+                                              fill_value=fill_value,
+                                              **kwargs)
+        set_packing_ncattrs(cf_var)
+
+        # If packing attributes are specified, don't bother checking whether
+        # the fill value is in the data.
+        fill_value_to_check = None if packing else fill_value
+
+        # Store the data and check if it is masked and contains the fill value
+        is_masked, contains_fill_value = store(data, cf_var,
+                                               fill_value_to_check)
+
+        if is_masked:
+            if contains_fill_value:
+                warnings.warn("Cube '{}' contains fill value {}. Some data "
+                              "points will be masked.".format(cube.name(),
+                                                              fill_value))
+        else:
+            cf_var.delncattr('_FillValue')
 
         if cube.standard_name:
             _setncattr(cf_var, 'standard_name', cube.standard_name)
@@ -2040,7 +2072,7 @@ def _increment_name(self, varname):
 def save(cube, filename, netcdf_format='NETCDF4', local_keys=None,
          unlimited_dimensions=None, zlib=False, complevel=4, shuffle=True,
          fletcher32=False, contiguous=False, chunksizes=None, endian='native',
-         least_significant_digit=None, packing=None):
+         least_significant_digit=None, packing=None, fill_value=None):
     """
     Save cube(s) to a netCDF file, given the cube and the filename.
 
@@ -2144,16 +2176,19 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None,
         This provides support for netCDF data packing as described in
         http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values
         If this argument is a type (or type string), appropriate values of
-        scale_factor and add_offset will be automatically calculated based on
-        `cube.data` and possible masking. For masked data, `fill_value` is
-        taken from netCDF4.default_fillvals. For more control, pass a dict with
-        one or more of the following keys: `dtype` (required), `scale_factor`,
-        `add_offset`, and `fill_value`. To save multiple cubes with different
-        packing parameters, pass an iterable of types, strings, dicts, or
-        `None`, one for each cube.  Note that automatic calculation of packing
-        parameters will trigger loading of lazy data; set them manually using a
-        dict to avoid this. The default is `None`, in which case the datatype
-        is determined from the cube and no packing will occur.
+        scale_factor and add_offset will be automatically calculated based
+        on `cube.data` and possible masking. For more control, pass a dict with
+        one or more of the following keys: `dtype` (required), `scale_factor`
+        and `add_offset`. Note that automatic calculation of packing parameters
+        will trigger loading of lazy data; set them manually using a dict to
+        avoid this. The default is `None`, in which case the datatype is
+        determined from the cube and no packing will occur.
+
+    * fill_value:
+        The value to use for the `_FillValue` attribute on the netCDF variable.
+        If `packing` is specified the value of `fill_value` should be in the
+        domain of the packed data. If this argument is not supplied and the
+        data is masked, `fill_value` is taken from netCDF4.default_fillvals
 
     Returns:
         None.
@@ -2250,7 +2285,8 @@ def is_valid_packspec(p):
         for cube, packspec in zip(cubes, packspecs):
             sman.write(cube, local_keys, unlimited_dimensions, zlib, complevel,
                        shuffle, fletcher32, contiguous, chunksizes, endian,
-                       least_significant_digit, packing=packspec)
+                       least_significant_digit, packing=packspec,
+                       fill_value=fill_value)
 
         if iris.config.netcdf.conventions_override:
             # Set to the default if custom conventions are not available.

diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py
@@ -158,12 +158,13 @@ def test_big_endian(self):
     def test_zlib(self):
         cube = self._simple_cube('>f4')
         with mock.patch('iris.fileformats.netcdf.netCDF4') as api:
+            api.default_fillvals = {'f4': 12345.}
             with Saver('/dummy/path', 'NETCDF4') as saver:
                 saver.write(cube, zlib=True)
         dataset = api.Dataset.return_value
         create_var_calls = mock.call.createVariable(
             'air_pressure_anomaly', np.dtype('float32'), ['dim0', 'dim1'],
-            fill_value=None, shuffle=True, least_significant_digit=None,
+            fill_value=mock.ANY, shuffle=True, least_significant_digit=None,
             contiguous=False, zlib=True, fletcher32=False,
             endian='native', complevel=4, chunksizes=None).call_list()
         dataset.assert_has_calls(create_var_calls)