diff --git a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt new file mode 100644 index 0000000000..d53450d50a --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_fill_value_save_arg.txt @@ -0,0 +1 @@ +* When saving a cube or list of cubes in NetCDF format, a fill value or list of fill values can be specified via a new `fill_value` argument. If a list is supplied, each fill value will be applied to each cube in turn. If a `fill_value` argument is not specified, the default fill value for the file format and the cube's data type will be used. Fill values are no longer taken from the cube's `data` attribute when it is a masked array. diff --git a/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt new file mode 100644 index 0000000000..c8c9f18584 --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_2.0/incompatiblechange_2017-Aug-29_netCDF_packing_changes.txt @@ -0,0 +1,3 @@ +* A 'fill_value' key can no longer be specified as part of the `packing` argument to `iris.save` when saving in netCDF format. Instead, a fill value or list of fill values should be specified as a separate `fill_value` argument if required. + +* If the `packing` argument to `iris.save` is a dictionary, an error is raised if it contains any keys other than 'dtype', 'scale_factor' and 'add_offset'. diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 0da1c4c785..b413d8752a 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -131,39 +131,6 @@ def as_concrete_data(data): return data -def get_fill_value(data): - """ - Return the fill value of a concrete or lazy masked array. - - Args: - - * data: - A dask array, NumPy `ndarray` or masked array. - - Returns: - The fill value of `data` if `data` represents a masked array, or None. - - """ - # If lazy, get the smallest slice of the data from which we can retrieve - # the fill_value. - if is_lazy_data(data): - inds = [0] * data.ndim - if inds: - inds[-1] = slice(0, 1) - data = data[tuple(inds)] - data = as_concrete_data(data) - else: - if isinstance(data, ma.core.MaskedConstant): - data = ma.array(data.data, mask=data.mask) - - # Now get the fill value. - fill_value = None - if ma.isMaskedArray(data): - fill_value = data.fill_value - - return fill_value - - def multidim_lazy_stack(stack): """ Recursively build a multidimensional stacked dask array. diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index a2d87d2eed..76d44c90d6 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -48,6 +48,7 @@ from iris.aux_factory import HybridHeightFactory, HybridPressureFactory, \ OceanSigmaZFactory, OceanSigmaFactory, OceanSFactory, OceanSg1Factory, \ OceanSg2Factory +import iris.config import iris.coord_systems import iris.coords import iris.cube @@ -56,7 +57,7 @@ import iris.fileformats._pyke_rules import iris.io import iris.util -from iris._lazy_data import as_lazy_data, get_fill_value +from iris._lazy_data import as_lazy_data # Show Pyke inference engine statistics. DEBUG = False @@ -741,6 +742,26 @@ def _setncattr(variable, name, attribute): return variable.setncattr(name, attribute) +class _FillValueMaskCheckAndStoreTarget(object): + """ + To be used with da.store. Remembers whether any element was equal to a + given value and whether it was masked, before passing the chunk to the + given target. + + """ + def __init__(self, target, fill_value=None): + self.target = target + self.fill_value = fill_value + self.contains_value = False + self.is_masked = False + + def __setitem__(self, keys, arr): + if self.fill_value is not None: + self.contains_value = self.contains_value or self.fill_value in arr + self.is_masked = self.is_masked or ma.is_masked(arr) + self.target[keys] = arr + + class Saver(object): """A manager for saving netcdf files.""" @@ -812,7 +833,7 @@ def __exit__(self, type, value, traceback): def write(self, cube, local_keys=None, unlimited_dimensions=None, zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', - least_significant_digit=None, packing=None): + least_significant_digit=None, packing=None, fill_value=None): """ Wrapper for saving cubes to a NetCDF file. @@ -899,14 +920,18 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values If this argument is a type (or type string), appropriate values of scale_factor and add_offset will be automatically calculated based - on `cube.data` and possible masking. For masked data, `fill_value` - is taken from netCDF4.default_fillvals. For more control, pass a - dict with one or more of the following keys: `dtype` (required), - `scale_factor`, `add_offset`, and `fill_value`. Note that automatic - calculation of packing parameters will trigger loading of lazy - data; set them manually using a dict to avoid this. The default is - `None`, in which case the datatype is determined from the cube and - no packing will occur. + on `cube.data` and possible masking. For more control, pass a dict + with one or more of the following keys: `dtype` (required), + `scale_factor` and `add_offset`. Note that automatic calculation of + packing parameters will trigger loading of lazy data; set them + manually using a dict to avoid this. The default is `None`, in + which case the datatype is determined from the cube and no packing + will occur. + + * fill_value: + The value to use for the `_FillValue` attribute on the netCDF + variable. If `packing` is specified the value of `fill_value` + should be in the domain of the packed data. Returns: None. @@ -955,7 +980,8 @@ def write(self, cube, local_keys=None, unlimited_dimensions=None, cube, dimension_names, local_keys, zlib=zlib, complevel=complevel, shuffle=shuffle, fletcher32=fletcher32, contiguous=contiguous, chunksizes=chunksizes, endian=endian, - least_significant_digit=least_significant_digit, packing=packing) + least_significant_digit=least_significant_digit, packing=packing, + fill_value=fill_value) # Add coordinate variables. self._add_dim_coords(cube, dimension_names) @@ -1840,7 +1866,7 @@ def add_ellipsoid(ellipsoid): _setncattr(cf_var_cube, 'grid_mapping', cs.grid_mapping_name) def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, - **kwargs): + packing=None, fill_value=None, **kwargs): """ Create CF-netCDF data variable for the cube and any associated grid mapping. @@ -1855,8 +1881,11 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, Kwargs: * local_keys (iterable of strings): - An interable of cube attribute keys. Any cube attributes - with matching keys will become attributes on the data variable. + * see :func:`iris.fileformats.netcdf.Saver.write` + * packing (type or string or dict or list): + * see :func:`iris.fileformats.netcdf.Saver.write` + * fill_value: + * see :func:`iris.fileformats.netcdf.Saver.write` All other keywords are passed through to the dataset's `createVariable` method. @@ -1865,47 +1894,41 @@ def _create_cf_data_variable(self, cube, dimension_names, local_keys=None, The newly created CF-netCDF data variable. """ - if 'packing' in kwargs: - packing = kwargs.pop('packing') - if packing: - scale_factor = None - add_offset = None - fill_value = None - if isinstance(packing, dict): - if 'dtype' not in packing: - msg = "The dtype attribute is required for packing." - raise ValueError(msg) - dtype = np.dtype(packing['dtype']) - if 'scale_factor' in packing: - scale_factor = packing['scale_factor'] - if 'add_offset' in packing: - add_offset = packing['add_offset'] - if 'fill_value' in packing: - fill_value = packing['fill_value'] + + if packing: + if isinstance(packing, dict): + if 'dtype' not in packing: + msg = "The dtype attribute is required for packing." + raise ValueError(msg) + dtype = np.dtype(packing['dtype']) + scale_factor = packing.get('scale_factor', None) + add_offset = packing.get('add_offset', None) + valid_keys = {'dtype', 'scale_factor', 'add_offset'} + invalid_keys = set(packing.keys()) - valid_keys + if invalid_keys: + msg = ("Invalid packing key(s) found: '{}'. The valid " + "keys are '{}'.".format("', '".join(invalid_keys), + "', '".join(valid_keys))) + raise ValueError(msg) + else: + # We compute the scale_factor and add_offset based on the + # min/max of the data. This requires the data to be loaded. + masked = ma.isMaskedArray(cube.data) + dtype = np.dtype(packing) + cmax = cube.data.max() + cmin = cube.data.min() + n = dtype.itemsize * 8 + if masked: + scale_factor = (cmax - cmin)/(2**n-2) else: - masked = ma.isMaskedArray(cube.data) - dtype = np.dtype(packing) - cmax = cube.data.max() - cmin = cube.data.min() - n = dtype.itemsize * 8 + scale_factor = (cmax - cmin)/(2**n-1) + if dtype.kind == 'u': + add_offset = cmin + elif dtype.kind == 'i': if masked: - scale_factor = (cmax - cmin)/(2**n-2) + add_offset = (cmax + cmin)/2 else: - scale_factor = (cmax - cmin)/(2**n-1) - if dtype.kind == 'u': - add_offset = cmin - elif dtype.kind == 'i': - if masked: - add_offset = (cmax + cmin)/2 - else: - add_offset = cmin + 2**(n-1)*scale_factor - needs_fill_value = (cube.has_lazy_data() or - ma.isMaskedArray(cube.data)) - if needs_fill_value and fill_value is None: - dtstr = dtype.str[1:] - fill_value = netCDF4.default_fillvals[dtstr] - else: - packing = None + add_offset = cmin + 2**(n-1)*scale_factor def set_packing_ncattrs(cfvar): """Set netCDF packing attributes.""" @@ -1924,36 +1947,58 @@ def set_packing_ncattrs(cfvar): self._dataset.file_format in ('NETCDF3_CLASSIC', 'NETCDF3_64BIT')): - if packing is None: - # Determine whether there is a cube MDI value. - fill_value = get_fill_value(cube.core_data()) - # Get the values in a form which is valid for the file format. data = self._ensure_valid_dtype(cube.data, 'cube', cube) - if packing is None: - dtype = data.dtype.newbyteorder('=') - - # Create the cube CF-netCDF data variable with data payload. - cf_var = self._dataset.createVariable( - cf_name, dtype, dimension_names, - fill_value=fill_value, **kwargs) - set_packing_ncattrs(cf_var) - cf_var[:] = data - + def store(data, cf_var, fill_value): + cf_var[:] = data + is_masked = ma.is_masked(data) + contains_value = fill_value is not None and fill_value in data + return is_masked, contains_value else: - # Create the cube CF-netCDF data variable. - if packing is None: - fill_value = get_fill_value(cube.core_data()) - dtype = cube.dtype.newbyteorder('=') - - cf_var = self._dataset.createVariable( - cf_name, dtype, - dimension_names, fill_value=fill_value, - **kwargs) - set_packing_ncattrs(cf_var) - - da.store([cube.lazy_data()], [cf_var]) + data = cube.lazy_data() + + def store(data, cf_var, fill_value): + # Store lazy data and check whether it is masked and contains + # the fill value + target = _FillValueMaskCheckAndStoreTarget(cf_var, fill_value) + da.store([data], [target]) + return target.is_masked, target.contains_value + + if not packing: + dtype = data.dtype.newbyteorder('=') + + # Create the cube CF-netCDF data variable with data payload. + cf_var = self._dataset.createVariable(cf_name, dtype, dimension_names, + fill_value=fill_value, + **kwargs) + set_packing_ncattrs(cf_var) + + # If packing attributes are specified, don't bother checking whether + # the fill value is in the data. + if packing: + fill_value_to_check = None + elif fill_value is not None: + fill_value_to_check = fill_value + else: + fill_value_to_check = netCDF4.default_fillvals[dtype.str[1:]] + + # Store the data and check if it is masked and contains the fill value + is_masked, contains_fill_value = store(data, cf_var, + fill_value_to_check) + + if dtype.itemsize == 1 and fill_value is None: + if is_masked: + warnings.warn("Cube '{}' contains masked byte data and will " + "be interpreted as unmasked. To save as masked " + "data please explicitly provide a fill value." + .format(cube.name())) + elif contains_fill_value: + warnings.warn("Cube '{}' contains data points equal to the fill " + "value {}. The points will be interpreted as being " + "masked. Please provide a fill_value argument not " + "equal to any data point.".format(cube.name(), + fill_value)) if cube.standard_name: _setncattr(cf_var, 'standard_name', cube.standard_name) @@ -2040,7 +2085,7 @@ def _increment_name(self, varname): def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, unlimited_dimensions=None, zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', - least_significant_digit=None, packing=None): + least_significant_digit=None, packing=None, fill_value=None): """ Save cube(s) to a netCDF file, given the cube and the filename. @@ -2144,16 +2189,24 @@ def save(cube, filename, netcdf_format='NETCDF4', local_keys=None, This provides support for netCDF data packing as described in http://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Packed-Data-Values If this argument is a type (or type string), appropriate values of - scale_factor and add_offset will be automatically calculated based on - `cube.data` and possible masking. For masked data, `fill_value` is - taken from netCDF4.default_fillvals. For more control, pass a dict with - one or more of the following keys: `dtype` (required), `scale_factor`, - `add_offset`, and `fill_value`. To save multiple cubes with different - packing parameters, pass an iterable of types, strings, dicts, or - `None`, one for each cube. Note that automatic calculation of packing - parameters will trigger loading of lazy data; set them manually using a - dict to avoid this. The default is `None`, in which case the datatype - is determined from the cube and no packing will occur. + scale_factor and add_offset will be automatically calculated based + on `cube.data` and possible masking. For more control, pass a dict with + one or more of the following keys: `dtype` (required), `scale_factor` + and `add_offset`. Note that automatic calculation of packing parameters + will trigger loading of lazy data; set them manually using a dict to + avoid this. The default is `None`, in which case the datatype is + determined from the cube and no packing will occur. If this argument is + a list it must have the same number of elements as `cube` if `cube` is + a `:class:`iris.cube.CubeList`, or one element, and each element of + this argument will be applied to each cube separately. + + * fill_value (numeric or list): + The value to use for the `_FillValue` attribute on the netCDF variable. + If `packing` is specified the value of `fill_value` should be in the + domain of the packed data. If this argument is a list it must have the + same number of elements as `cube` if `cube` is a + `:class:`iris.cube.CubeList`, or a single element, and each element of + this argument will be applied to each cube separately. Returns: None. @@ -2244,13 +2297,29 @@ def is_valid_packspec(p): raise ValueError(msg) packspecs = packing + # Make fill-value(s) into an iterable over cubes. + if isinstance(fill_value, six.string_types): + # Strings are awkward -- handle separately. + fill_values = repeat(fill_value) + else: + try: + fill_values = tuple(fill_value) + except TypeError: + fill_values = repeat(fill_value) + else: + if len(fill_values) != len(cubes): + msg = ('If fill_value is a list, it must have the ' + 'same number of elements as the cube argument.') + raise ValueError(msg) + # Initialise Manager for saving with Saver(filename, netcdf_format) as sman: # Iterate through the cubelist. - for cube, packspec in zip(cubes, packspecs): + for cube, packspec, fill_value in zip(cubes, packspecs, fill_values): sman.write(cube, local_keys, unlimited_dimensions, zlib, complevel, shuffle, fletcher32, contiguous, chunksizes, endian, - least_significant_digit, packing=packspec) + least_significant_digit, packing=packspec, + fill_value=fill_value) if iris.config.netcdf.conventions_override: # Set to the default if custom conventions are not available. diff --git a/lib/iris/tests/integration/test_netcdf.py b/lib/iris/tests/integration/test_netcdf.py index 8c3e3745b3..c3f25b3127 100644 --- a/lib/iris/tests/integration/test_netcdf.py +++ b/lib/iris/tests/integration/test_netcdf.py @@ -210,20 +210,6 @@ def test_lazy_preserved_save(self): saver.write(acube) self.assertTrue(acube.has_lazy_data()) - @tests.skip_dask_mask - def test_lazy_mask_preserve_fill_value(self): - data = ma.array([0, 1], mask=[False, True]) - cube = iris.cube.Cube(data, fill_value=-1) - with self.temp_filename(suffix='.nc') as filename, \ - self.temp_filename(suffix='.nc') as other_filename: - iris.save(cube, filename, unlimited_dimensions=[]) - ncube = iris.load_cube(filename) - # Lazy save of the masked cube - iris.save(ncube, other_filename, unlimited_dimensions=[]) - ds = nc.Dataset(other_filename, 'r') - avar = ds['unknown'] - self.assertEqual(avar._FillValue, -1) - @tests.skip_data class TestCellMeasures(tests.IrisTest): @@ -427,7 +413,6 @@ def test_multi_packed_single_dtype(self): # Read PP input file. self._multi_test('multi_packed_single_dtype.cdl') - @tests.skip_dask_mask def test_multi_packed_multi_dtype(self): """Test saving multiple packed cubes with pack_dtype list.""" # Read PP input file. diff --git a/lib/iris/tests/results/integration/netcdf/TestPackedData/multi_packed_multi_dtype.cdl b/lib/iris/tests/results/integration/netcdf/TestPackedData/multi_packed_multi_dtype.cdl index ffc6763a04..34fcf2825b 100644 --- a/lib/iris/tests/results/integration/netcdf/TestPackedData/multi_packed_multi_dtype.cdl +++ b/lib/iris/tests/results/integration/netcdf/TestPackedData/multi_packed_multi_dtype.cdl @@ -46,7 +46,6 @@ variables: height:standard_name = "height" ; height:positive = "up" ; float precipitation_flux(time, latitude, longitude) ; - precipitation_flux:_FillValue = -1.e+30f ; precipitation_flux:standard_name = "precipitation_flux" ; precipitation_flux:units = "kg m-2 s-1" ; precipitation_flux:um_stash_source = "m01s05i216" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_monotonic.cml b/lib/iris/tests/results/netcdf/netcdf_monotonic.cml index a59a629e9f..fd8dffdab3 100644 --- a/lib/iris/tests/results/netcdf/netcdf_monotonic.cml +++ b/lib/iris/tests/results/netcdf/netcdf_monotonic.cml @@ -1,6 +1,6 @@ - + @@ -18,7 +18,7 @@ - + @@ -36,7 +36,7 @@ - + diff --git a/lib/iris/tests/results/netcdf/netcdf_save_hybrid_height.cdl b/lib/iris/tests/results/netcdf/netcdf_save_hybrid_height.cdl index 5a57a8e6ea..f3e56065f1 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_hybrid_height.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_hybrid_height.cdl @@ -6,7 +6,6 @@ dimensions: model_level_number = 10 ; variables: float air_potential_temperature(time, model_level_number, grid_latitude, grid_longitude) ; - air_potential_temperature:_FillValue = -1.073742e+09f ; air_potential_temperature:standard_name = "air_potential_temperature" ; air_potential_temperature:units = "K" ; air_potential_temperature:um_stash_source = "m01s00i004" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_multi_0.cdl b/lib/iris/tests/results/netcdf/netcdf_save_multi_0.cdl index 8e8a021d9b..5fcbcc29cb 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_multi_0.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_multi_0.cdl @@ -5,7 +5,6 @@ dimensions: longitude = 96 ; variables: float air_temperature(time, latitude, longitude) ; - air_temperature:_FillValue = -1.e+30f ; air_temperature:standard_name = "air_temperature" ; air_temperature:units = "K" ; air_temperature:um_stash_source = "m01s03i236" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_multi_1.cdl b/lib/iris/tests/results/netcdf/netcdf_save_multi_1.cdl index b26b390659..eb1343fddb 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_multi_1.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_multi_1.cdl @@ -5,7 +5,6 @@ dimensions: longitude = 96 ; variables: float air_temperature(time, latitude, longitude) ; - air_temperature:_FillValue = -1.e+30f ; air_temperature:standard_name = "air_temperature" ; air_temperature:units = "K" ; air_temperature:um_stash_source = "m01s03i236" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_multi_2.cdl b/lib/iris/tests/results/netcdf/netcdf_save_multi_2.cdl index dc240568ed..40a700b5d3 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_multi_2.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_multi_2.cdl @@ -5,7 +5,6 @@ dimensions: longitude = 96 ; variables: float precipitation_flux(time, latitude, longitude) ; - precipitation_flux:_FillValue = -1.e+30f ; precipitation_flux:standard_name = "precipitation_flux" ; precipitation_flux:units = "kg m-2 s-1" ; precipitation_flux:um_stash_source = "m01s05i216" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_multiple.cdl b/lib/iris/tests/results/netcdf/netcdf_save_multiple.cdl index 5d83b77c6d..10b0a62184 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_multiple.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_multiple.cdl @@ -5,7 +5,6 @@ dimensions: longitude = 96 ; variables: float air_temperature(time, latitude, longitude) ; - air_temperature:_FillValue = -1.e+30f ; air_temperature:standard_name = "air_temperature" ; air_temperature:units = "K" ; air_temperature:um_stash_source = "m01s03i236" ; @@ -45,7 +44,6 @@ variables: height:standard_name = "height" ; height:positive = "up" ; float air_temperature_0(time, latitude, longitude) ; - air_temperature_0:_FillValue = -1.e+30f ; air_temperature_0:standard_name = "air_temperature" ; air_temperature_0:units = "K" ; air_temperature_0:um_stash_source = "m01s03i236" ; @@ -53,7 +51,6 @@ variables: air_temperature_0:grid_mapping = "latitude_longitude" ; air_temperature_0:coordinates = "forecast_period forecast_reference_time height" ; float precipitation_flux(time, latitude, longitude) ; - precipitation_flux:_FillValue = -1.e+30f ; precipitation_flux:standard_name = "precipitation_flux" ; precipitation_flux:units = "kg m-2 s-1" ; precipitation_flux:um_stash_source = "m01s05i216" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_ndim_auxiliary.cdl b/lib/iris/tests/results/netcdf/netcdf_save_ndim_auxiliary.cdl index ee172ace82..790d2c37b7 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_ndim_auxiliary.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_ndim_auxiliary.cdl @@ -5,7 +5,6 @@ dimensions: rlon = 174 ; variables: float pr(time, rlat, rlon) ; - pr:_FillValue = 1.e+30f ; pr:standard_name = "precipitation_flux" ; pr:long_name = "Precipitation" ; pr:units = "kg m-2 s-1" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_save_single.cdl b/lib/iris/tests/results/netcdf/netcdf_save_single.cdl index 6bee1a5589..f6ef6fc338 100644 --- a/lib/iris/tests/results/netcdf/netcdf_save_single.cdl +++ b/lib/iris/tests/results/netcdf/netcdf_save_single.cdl @@ -4,7 +4,6 @@ dimensions: longitude = 96 ; variables: float air_temperature(latitude, longitude) ; - air_temperature:_FillValue = -1.e+30f ; air_temperature:standard_name = "air_temperature" ; air_temperature:units = "K" ; air_temperature:um_stash_source = "m01s03i236" ; diff --git a/lib/iris/tests/results/netcdf/netcdf_units_1.cml b/lib/iris/tests/results/netcdf/netcdf_units_1.cml index 567ce1fa3a..7f4e700139 100644 --- a/lib/iris/tests/results/netcdf/netcdf_units_1.cml +++ b/lib/iris/tests/results/netcdf/netcdf_units_1.cml @@ -1,6 +1,6 @@ - + diff --git a/lib/iris/tests/results/netcdf/uint32_data_netcdf3.cml b/lib/iris/tests/results/netcdf/uint32_data_netcdf3.cml index 05ac5ec7a8..93da2db83d 100644 --- a/lib/iris/tests/results/netcdf/uint32_data_netcdf3.cml +++ b/lib/iris/tests/results/netcdf/uint32_data_netcdf3.cml @@ -1,6 +1,6 @@ - + diff --git a/lib/iris/tests/test_netcdf.py b/lib/iris/tests/test_netcdf.py index 347b114f09..6823ef9cf9 100644 --- a/lib/iris/tests/test_netcdf.py +++ b/lib/iris/tests/test_netcdf.py @@ -51,7 +51,6 @@ @tests.skip_data class TestNetCDFLoad(tests.IrisTest): - @tests.skip_dask_mask def test_monotonic(self): cubes = iris.load(tests.get_data_path( ('NetCDF', 'testing', 'test_monotonic_coordinate.nc'))) @@ -84,7 +83,6 @@ def test_missing_time_bounds(self): dataset.close() cube = iris.load_cube(filename, 'eastward_wind') - @tests.skip_dask_mask def test_load_global_xyzt_gems(self): # Test loading single xyzt CF-netCDF file (multi-cube). cubes = iris.load(tests.get_data_path(('NetCDF', 'global', 'xyz_t', @@ -96,7 +94,7 @@ def test_load_global_xyzt_gems(self): # manager loading. lnsp = cubes[1] self.assertTrue(ma.isMaskedArray(lnsp.data)) - self.assertEqual(-32767.0, lnsp.fill_value) + self.assertEqual(-32767.0, lnsp.data.fill_value) def test_load_global_xyzt_gems_iter(self): # Test loading stepped single xyzt CF-netCDF file (multi-cube). @@ -245,7 +243,6 @@ def test_deferred_loading(self): self.assertCML(cube[0][(0, 2), (1, 3)], ('netcdf', 'netcdf_deferred_mix_1.cml')) - @tests.skip_dask_mask def test_units(self): # Test exercising graceful cube and coordinate units loading. cube0, cube1 = sorted(iris.load(tests.get_data_path(('NetCDF', @@ -426,7 +423,6 @@ def test_netcdf_save_format(self): with self.assertRaises(ValueError): iris.save(cube, file_out, netcdf_format='WIBBLE') - @tests.skip_dask_mask @tests.skip_data def test_netcdf_save_single(self): # Test saving a single CF-netCDF file. @@ -445,7 +441,6 @@ def test_netcdf_save_single(self): # TODO investigate why merge now make time an AuxCoord rather than a # DimCoord and why forecast_period is 'preferred'. - @tests.skip_dask_mask @tests.skip_data def test_netcdf_save_multi2multi(self): # Test saving multiple CF-netCDF files. @@ -464,7 +459,6 @@ def test_netcdf_save_multi2multi(self): self.assertCDL(file_out, ('netcdf', 'netcdf_save_multi_%d.cdl' % index)) - @tests.skip_dask_mask @tests.skip_data def test_netcdf_save_multi2single(self): # Test saving multiple cubes to a single CF-netCDF file. @@ -565,7 +559,6 @@ def test_netcdf_multi_conflict_name_dup_coord(self): self.assertCDL( file_out, ('netcdf', 'multi_dim_coord_slightly_different.cdl')) - @tests.skip_dask_mask @tests.skip_data def test_netcdf_hybrid_height(self): # Test saving a CF-netCDF file which contains a hybrid height @@ -590,7 +583,6 @@ def test_netcdf_hybrid_height(self): self.assertCML(cube, ('netcdf', 'netcdf_save_load_hybrid_height.cml')) - @tests.skip_dask_mask @tests.skip_data def test_netcdf_save_ndim_auxiliary(self): # Test saving CF-netCDF with multi-dimensional auxiliary coordinates. @@ -864,7 +856,6 @@ def test_uint32_auxiliary_coord_netcdf3(self): 'uint32_auxiliary_coord_netcdf3.cml'), checksum=False) - @tests.skip_dask_mask def test_uint32_data_netcdf3(self): self.cube.data = self.cube.data.astype(np.uint32) with self.temp_filename(suffix='.nc') as filename: diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py index 46621c892b..3ab339b21e 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py @@ -24,10 +24,15 @@ # importing anything else. import iris.tests as tests +from contextlib import contextmanager +import warnings + import netCDF4 as nc import numpy as np +from numpy import ma import iris +from iris._lazy_data import as_lazy_data from iris.coord_systems import (GeogCS, TransverseMercator, RotatedGeogCS, LambertConformal, Mercator, Stereographic, LambertAzimuthalEqualArea) @@ -325,6 +330,164 @@ def test_valid_max_saved(self): ds.close() +class Test_write_fill_value(tests.IrisTest): + def _make_cube(self, dtype, lazy=False, masked_value=None, + masked_index=None): + data = np.arange(12, dtype=dtype).reshape(3, 4) + if masked_value is not None: + data = ma.masked_equal(data, masked_value) + if masked_index is not None: + data = np.ma.masked_array(data) + data[masked_index] = ma.masked + if lazy: + data = as_lazy_data(data) + lat = DimCoord(np.arange(3), 'latitude', units='degrees') + lon = DimCoord(np.arange(4), 'longitude', units='degrees') + return Cube(data, standard_name='air_temperature', units='K', + dim_coords_and_dims=[(lat, 0), (lon, 1)]) + + @contextmanager + def _netCDF_var(self, cube, **kwargs): + # Get the netCDF4 Variable for a cube from a temp file + standard_name = cube.standard_name + with self.temp_filename('.nc') as nc_path: + with Saver(nc_path, 'NETCDF4') as saver: + saver.write(cube, **kwargs) + ds = nc.Dataset(nc_path) + var, = [var for var in ds.variables.values() + if var.standard_name == standard_name] + yield var + + @contextmanager + def _warning_check(self, message_text='', expect_warning=True): + # Check that a warning is raised containing a given string, or that + # no warning containing a given string is raised. + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + yield + matches = (message_text in str(warning.message) for warning in w) + warning_raised = any(matches) + msg = "Warning containing text '{}' not raised." if expect_warning \ + else "Warning containing text '{}' unexpectedly raised." + self.assertEqual(expect_warning, warning_raised, + msg.format(message_text)) + + def test_fill_value(self): + # Test that a passed fill value is saved as a _FillValue attribute. + cube = self._make_cube('>f4') + fill_value = 12345. + with self._netCDF_var(cube, fill_value=fill_value) as var: + self.assertEqual(fill_value, var._FillValue) + + def test_default_fill_value(self): + # Test that if no fill value is passed then there is no _FillValue. + # attribute. + cube = self._make_cube('>f4') + with self._netCDF_var(cube) as var: + self.assertNotIn('_FillValue', var.ncattrs()) + + def test_mask_fill_value(self): + # Test that masked data saves correctly when given a fill value. + index = (1, 1) + fill_value = 12345. + cube = self._make_cube('>f4', masked_index=index) + with self._netCDF_var(cube, fill_value=fill_value) as var: + self.assertEqual(fill_value, var._FillValue) + self.assertTrue(var[index].mask) + + def test_mask_default_fill_value(self): + # Test that masked data saves correctly using the default fill value. + index = (1, 1) + cube = self._make_cube('>f4', masked_index=index) + with self._netCDF_var(cube) as var: + self.assertNotIn('_FillValue', var.ncattrs()) + self.assertTrue(var[index].mask) + + def test_mask_lazy_fill_value(self): + # Test that masked lazy data saves correctly when given a fill value. + index = (1, 1) + fill_value = 12345. + cube = self._make_cube('>f4', masked_index=index, lazy=True) + with self._netCDF_var(cube, fill_value=fill_value) as var: + self.assertEqual(var._FillValue, fill_value) + self.assertTrue(var[index].mask) + + def test_mask_lazy_default_fill_value(self): + # Test that masked lazy data saves correctly using the default fill + # value. + index = (1, 1) + cube = self._make_cube('>f4', masked_index=index, lazy=True) + with self._netCDF_var(cube) as var: + self.assertNotIn('_FillValue', var.ncattrs()) + self.assertTrue(var[index].mask) + + def test_contains_fill_value_passed(self): + # Test that a warning is raised if the data contains the fill value. + cube = self._make_cube('>f4') + fill_value = 1 + with self._warning_check( + 'contains data points equal to the fill value'): + with self._netCDF_var(cube, fill_value=fill_value): + pass + + def test_contains_fill_value_byte(self): + # Test that a warning is raised if the data contains the fill value + # when it is of a byte type. + cube = self._make_cube('>i1') + fill_value = 1 + with self._warning_check( + 'contains data points equal to the fill value'): + with self._netCDF_var(cube, fill_value=fill_value): + pass + + def test_contains_default_fill_value(self): + # Test that a warning is raised if the data contains the default fill + # value if no fill_value argument is supplied. + cube = self._make_cube('>f4') + cube.data[0, 0] = nc.default_fillvals['f4'] + with self._warning_check( + 'contains data points equal to the fill value'): + with self._netCDF_var(cube): + pass + + def test_contains_default_fill_value_byte(self): + # Test that no warning is raised if the data contains the default fill + # value if no fill_value argument is supplied when the data is of a + # byte type. + cube = self._make_cube('>i1') + with self._warning_check( + 'contains data points equal to the fill value', False): + with self._netCDF_var(cube): + pass + + def test_contains_masked_fill_value(self): + # Test that no warning is raised if the data contains the fill_value at + # a masked point. + fill_value = 1 + cube = self._make_cube('>f4', masked_value=fill_value) + with self._warning_check( + 'contains data points equal to the fill value', False): + with self._netCDF_var(cube, fill_value=fill_value): + pass + + def test_masked_byte_default_fill_value(self): + # Test that a warning is raised when saving masked byte data with no + # fill value supplied. + cube = self._make_cube('>i1', masked_value=1) + with self._warning_check('contains masked byte data', True): + with self._netCDF_var(cube): + pass + + def test_masked_byte_fill_value_passed(self): + # Test that no warning is raised when saving masked byte data with a + # fill value supplied if the the data does not contain the fill_value. + fill_value = 100 + cube = self._make_cube('>i1', masked_value=2) + with self._warning_check('contains masked byte data', False): + with self._netCDF_var(cube, fill_value=fill_value): + pass + + class _Common__check_attribute_compliance(object): def setUp(self): self.container = mock.Mock(name='container', attributes={}) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py b/lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py new file mode 100644 index 0000000000..3d2d74e869 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/test__FillValueMaskCheckAndStoreTarget.py @@ -0,0 +1,104 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +""" +Unit tests for the `iris.fileformats.netcdf._FillValueMaskCheckAndStoreTarget` +class. + +""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import mock +import numpy as np + +from iris.fileformats.netcdf import _FillValueMaskCheckAndStoreTarget + + +class Test__FillValueMaskCheckAndStoreTarget(tests.IrisTest): + def _call_target(self, fill_value, keys, vals): + inner_target = mock.MagicMock() + target = _FillValueMaskCheckAndStoreTarget(inner_target, + fill_value=fill_value) + + for key, val in zip(keys, vals): + target[key] = val + + calls = [mock.call(key, val) for key, val in zip(keys, vals)] + inner_target.__setitem__.assert_has_calls(calls) + + return target + + def test___setitem__(self): + self._call_target(None, [1], [2]) + + def test_no_fill_value_not_masked(self): + # Test when the fill value is not present and the data is not masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.arange(5)] + fill_value = 16 + target = self._call_target(fill_value, keys, vals) + self.assertFalse(target.contains_value) + self.assertFalse(target.is_masked) + + def test_contains_fill_value_not_masked(self): + # Test when the fill value is present and the data is not masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.arange(5)] + fill_value = 5 + target = self._call_target(fill_value, keys, vals) + self.assertTrue(target.contains_value) + self.assertFalse(target.is_masked) + + def test_no_fill_value_masked(self): + # Test when the fill value is not present and the data is masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.ma.masked_equal(np.arange(5), 3)] + fill_value = 16 + target = self._call_target(fill_value, keys, vals) + self.assertFalse(target.contains_value) + self.assertTrue(target.is_masked) + + def test_contains_fill_value_masked(self): + # Test when the fill value is present and the data is masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.ma.masked_equal(np.arange(5), 3)] + fill_value = 5 + target = self._call_target(fill_value, keys, vals) + self.assertTrue(target.contains_value) + self.assertTrue(target.is_masked) + + def test_fill_value_None(self): + # Test when the fill value is None + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.arange(5)] + fill_value = None + target = self._call_target(fill_value, keys, vals) + self.assertFalse(target.contains_value) + + def test_contains_masked_fill_value(self): + # Test when the fill value is present but masked the data is masked + keys = [slice(0, 10), slice(10, 15)] + vals = [np.arange(10), np.ma.masked_equal(np.arange(10, 15), 13)] + fill_value = 13 + target = self._call_target(fill_value, keys, vals) + self.assertFalse(target.contains_value) + self.assertTrue(target.is_masked) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_save.py b/lib/iris/tests/unit/fileformats/netcdf/test_save.py index a6e991b14e..6aa8dcffc0 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_save.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_save.py @@ -28,7 +28,8 @@ import numpy as np import iris -from iris.cube import Cube +from iris.coords import DimCoord +from iris.cube import Cube, CubeList from iris.fileformats.netcdf import save, CF_CONVENTIONS_VERSION from iris.tests.stock import lat_lon_cube @@ -125,5 +126,96 @@ def test_no_unlimited_future_default(self): self.assertFalse(ds.dimensions['latitude'].isunlimited()) +class Test_fill_value(tests.IrisTest): + def setUp(self): + self.standard_names = ['air_temperature', + 'air_potential_temperature', + 'air_temperature_anomaly'] + + def _make_cubes(self): + lat = DimCoord(np.arange(3), 'latitude', units='degrees') + lon = DimCoord(np.arange(4), 'longitude', units='degrees') + data = np.arange(12, dtype='f4').reshape(3, 4) + return CubeList(Cube(data, standard_name=name, units='K', + dim_coords_and_dims=[(lat, 0), (lon, 1)]) + for name in self.standard_names) + + def test_None(self): + # Test that when no fill_value argument is passed, the fill_value + # argument to Saver.write is None or not present. + cubes = self._make_cubes() + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + save(cubes, 'dummy.nc') + + # Get the Saver.write mock + with Saver() as saver: + write = saver.write + + self.assertEqual(3, write.call_count) + for call in write.mock_calls: + _, _, kwargs = call + if 'fill_value' in kwargs: + self.assertIs(None, kwargs['fill_value']) + + def test_single(self): + # Test that when a single value is passed as the fill_value argument, + # that value is passed to each call to Saver.write + cubes = self._make_cubes() + fill_value = 12345. + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + save(cubes, 'dummy.nc', fill_value=fill_value) + + # Get the Saver.write mock + with Saver() as saver: + write = saver.write + + self.assertEqual(3, write.call_count) + for call in write.mock_calls: + _, _, kwargs = call + self.assertEqual(fill_value, kwargs['fill_value']) + + def test_multiple(self): + # Test that when a list is passed as the fill_value argument, + # each element is passed to separate calls to Saver.write + cubes = self._make_cubes() + fill_values = [123., 456., 789.] + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + save(cubes, 'dummy.nc', fill_value=fill_values) + + # Get the Saver.write mock + with Saver() as saver: + write = saver.write + + self.assertEqual(3, write.call_count) + for call, fill_value in zip(write.mock_calls, fill_values): + _, _, kwargs = call + self.assertEqual(fill_value, kwargs['fill_value']) + + def test_single_string(self): + # Test that when a string is passed as the fill_value argument, + # that value is passed to calls to Saver.write + cube = Cube(['abc', 'def', 'hij']) + fill_value = 'xyz' + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + save(cube, 'dummy.nc', fill_value=fill_value) + + # Get the Saver.write mock + with Saver() as saver: + write = saver.write + + self.assertEqual(1, write.call_count) + _, _, kwargs = write.mock_calls[0] + self.assertEqual(fill_value, kwargs['fill_value']) + + def test_multi_wrong_length(self): + # Test that when a list of a different length to the number of cubes + # is passed as the fill_value argument, an error is raised + cubes = self._make_cubes() + fill_values = [1., 2., 3., 4.] + with mock.patch('iris.fileformats.netcdf.Saver') as Saver: + with self.assertRaises(ValueError): + save(cubes, 'dummy.nc', fill_value=fill_values) + + if __name__ == "__main__": tests.main() diff --git a/lib/iris/tests/unit/lazy_data/test_get_fill_value.py b/lib/iris/tests/unit/lazy_data/test_get_fill_value.py deleted file mode 100644 index e83739f331..0000000000 --- a/lib/iris/tests/unit/lazy_data/test_get_fill_value.py +++ /dev/null @@ -1,75 +0,0 @@ -# (C) British Crown Copyright 2017, Met Office -# -# This file is part of Iris. -# -# Iris is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Iris is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with Iris. If not, see . -"""Test the function :func:`iris._lazy data.get_fill_value`.""" - -from __future__ import (absolute_import, division, print_function) -from six.moves import (filter, input, map, range, zip) # noqa - -# Import iris.tests first so that some things can be initialised before -# importing anything else. -import iris.tests as tests - -import dask.array as da -import numpy as np -import numpy.ma as ma - -from iris._lazy_data import as_lazy_data, get_fill_value - - -class Test_get_fill_value(tests.IrisTest): - def setUp(self): - # Array shape and fill-value. - spec = [((2, 3, 4, 5), -1), # 4d array - ((2, 3, 4), -2), # 3d array - ((2, 3), -3), # 2d array - ((2,), -4), # 1d array - ((), -5)] # 0d array - self.arrays = [np.empty(shape) for (shape, _) in spec] - self.masked = [ma.empty(shape, fill_value=fv) for (shape, fv) in spec] - self.lazy_arrays = [as_lazy_data(array) for array in self.arrays] - self.lazy_masked = [as_lazy_data(array) for array in self.masked] - # Add the masked constant case. - mc = ma.array([0], mask=True)[0] - self.masked.append(mc) - self.lazy_masked.append(as_lazy_data(mc)) - # Collect the expected fill-values. - self.expected_fill_values = [fv for (_, fv) in spec] - mc_fill_value = ma.masked_array(0, dtype=mc.dtype).fill_value - self.expected_fill_values.append(mc_fill_value) - - def test_arrays(self): - for array in self.arrays: - self.assertIsNone(get_fill_value(array)) - - def test_masked(self): - for array, expected in zip(self.masked, self.expected_fill_values): - result = get_fill_value(array) - self.assertEqual(result, expected) - - def test_lazy_arrays(self): - for array in self.lazy_arrays: - self.assertIsNone(get_fill_value(array)) - - def test_lazy_masked(self): - for array, expected in zip(self.lazy_masked, - self.expected_fill_values): - result = get_fill_value(array) - self.assertEqual(result, expected) - - -if __name__ == '__main__': - tests.main()