diff --git a/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb b/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb index 5ecfeb77b17..815d71a5f40 100644 --- a/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb +++ b/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb @@ -498,6 +498,22 @@ fc_build_cell_measure python engine.rule_triggered.add(rule.name) +# +# Context: +# This rule will trigger for each ancillary_variable case specific fact. +# +# Purpose: +# Add the ancillary variable to the cube. +# +fc_build_ancil_var + foreach + facts_cf.ancillary_variable($var) + assert + python ancil_var = engine.cf_var.cf_group.ancillary_variables[$var] + python build_ancil_var(engine, ancil_var) + python engine.rule_triggered.add(rule.name) + + # # Context: # This rule will trigger iff a CF latitude coordinate exists and @@ -1941,25 +1957,26 @@ fc_extras # Add it to the cube cube.add_aux_coord(coord, data_dims) - # Update the coordinate to CF-netCDF variable mapping. + # Make a list with names, stored on the engine, so we can find them all later. engine.provides['coordinates'].append((coord, cf_coord_var.cf_name)) ################################################################################ - def build_cell_measures(engine, cf_cm_attr, coord_name=None): + def build_cell_measures(engine, cf_cm_var): """Create a CellMeasure instance and add it to the cube.""" cf_var = engine.cf_var cube = engine.cube attributes = {} # Get units - attr_units = get_attr_units(cf_cm_attr, attributes) + attr_units = get_attr_units(cf_cm_var, attributes) - data = _get_cf_var_data(cf_cm_attr, engine.filename) + # Get (lazy) content array + data = _get_cf_var_data(cf_cm_var, engine.filename) # Determine the name of the dimension/s shared between the CF-netCDF data variable # and the coordinate being built. - common_dims = [dim for dim in cf_cm_attr.dimensions + common_dims = [dim for dim in cf_cm_var.dimensions if dim in cf_var.dimensions] data_dims = None if common_dims: @@ -1967,10 +1984,10 @@ fc_extras data_dims = [cf_var.dimensions.index(dim) for dim in common_dims] # Determine the standard_name, long_name and var_name - standard_name, long_name, var_name = get_names(cf_cm_attr, coord_name, attributes) + standard_name, long_name, var_name = get_names(cf_cm_var, None, attributes) # Obtain the cf_measure. - measure = cf_cm_attr.cf_measure + measure = cf_cm_var.cf_measure # Create the CellMeasure cell_measure = iris.coords.CellMeasure(data, @@ -1984,6 +2001,51 @@ fc_extras # Add it to the cube cube.add_cell_measure(cell_measure, data_dims) + # Make a list with names, stored on the engine, so we can find them all later. + engine.provides['cell_measures'].append((cell_measure, cf_cm_var.cf_name)) + + + + ################################################################################ + def build_ancil_var(engine, cf_av_var): + """Create an AncillaryVariable instance and add it to the cube.""" + cf_var = engine.cf_var + cube = engine.cube + attributes = {} + + # Get units + attr_units = get_attr_units(cf_av_var, attributes) + + # Get (lazy) content array + data = _get_cf_var_data(cf_av_var, engine.filename) + + # Determine the name of the dimension/s shared between the CF-netCDF data variable + # and the AV being built. + common_dims = [dim for dim in cf_av_var.dimensions + if dim in cf_var.dimensions] + data_dims = None + if common_dims: + # Calculate the offset of each common dimension. + data_dims = [cf_var.dimensions.index(dim) for dim in common_dims] + + # Determine the standard_name, long_name and var_name + standard_name, long_name, var_name = get_names(cf_av_var, None, attributes) + + # Create the AncillaryVariable + av = iris.coords.AncillaryVariable( + data, + standard_name=standard_name, + long_name=long_name, + var_name=var_name, + units=attr_units, + attributes=attributes) + + # Add it to the cube + cube.add_ancillary_variable(av, data_dims) + + # Make a list with names, stored on the engine, so we can find them all later. + engine.provides['ancillary_variables'].append((av, cf_av_var.cf_name)) + ################################################################################ diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 4d7ddedc61c..08b079c3edb 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -459,7 +459,10 @@ def __setstate__(self, state): def _assert_case_specific_facts(engine, cf, cf_group): # Initialise pyke engine "provides" hooks. + # These are used to patch non-processed element attributes after rules activation. engine.provides["coordinates"] = [] + engine.provides["cell_measures"] = [] + engine.provides["ancillary_variables"] = [] # Assert facts for CF coordinates. for cf_name in cf_group.coordinates.keys(): @@ -479,6 +482,12 @@ def _assert_case_specific_facts(engine, cf, cf_group): _PYKE_FACT_BASE, "cell_measure", (cf_name,) ) + # Assert facts for CF ancillary variables. + for cf_name in cf_group.ancillary_variables.keys(): + engine.add_case_specific_fact( + _PYKE_FACT_BASE, "ancillary_variable", (cf_name,) + ) + # Assert facts for CF grid_mappings. for cf_name in cf_group.grid_mappings.keys(): engine.add_case_specific_fact( @@ -597,31 +606,38 @@ def _load_cube(engine, cf, cf_var, filename): # Run pyke inference engine with forward chaining rules. engine.activate(_PYKE_RULE_BASE) - # Populate coordinate attributes with the untouched attributes from the - # associated CF-netCDF variable. - coordinates = engine.provides.get("coordinates", []) - + # Having run the rules, now populate the attributes of all the cf elements with the + # "unused" attributes from the associated CF-netCDF variable. + # That is, all those that aren't CF reserved terms. def attribute_predicate(item): return item[0] not in _CF_ATTRS - for coord, cf_var_name in coordinates: - tmpvar = filter( - attribute_predicate, cf.cf_group[cf_var_name].cf_attrs_unused() - ) + def add_unused_attributes(iris_object, cf_var): + tmpvar = filter(attribute_predicate, cf_var.cf_attrs_unused()) for attr_name, attr_value in tmpvar: - _set_attributes(coord.attributes, attr_name, attr_value) + _set_attributes(iris_object.attributes, attr_name, attr_value) + + def fix_attributes_all_elements(role_name): + elements_and_names = engine.provides.get(role_name, []) + + for iris_object, cf_var_name in elements_and_names: + add_unused_attributes(iris_object, cf.cf_group[cf_var_name]) + + # Populate the attributes of all coordinates, cell-measures and ancillary-vars. + fix_attributes_all_elements("coordinates") + fix_attributes_all_elements("ancillary_variables") + fix_attributes_all_elements("cell_measures") - tmpvar = filter(attribute_predicate, cf_var.cf_attrs_unused()) - # Attach untouched attributes of the associated CF-netCDF data variable to - # the cube. - for attr_name, attr_value in tmpvar: - _set_attributes(cube.attributes, attr_name, attr_value) + # Also populate attributes of the top-level cube itself. + add_unused_attributes(cube, cf_var) + # Work out reference names for all the coords. names = { coord.var_name: coord.standard_name or coord.var_name or "unknown" for coord in cube.coords() } + # Add all the cube cell methods. cube.cell_methods = [ iris.coords.CellMethod( method=method.method, diff --git a/lib/iris/tests/test_netcdf.py b/lib/iris/tests/test_netcdf.py index a550e1ed4b7..91e37dd3a83 100644 --- a/lib/iris/tests/test_netcdf.py +++ b/lib/iris/tests/test_netcdf.py @@ -16,6 +16,7 @@ import os.path import shutil import stat +from subprocess import check_call import tempfile from unittest import mock @@ -27,8 +28,10 @@ import iris.analysis.trajectory import iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc as pyke_rules import iris.fileformats.netcdf +from iris.fileformats.netcdf import load_cubes as nc_load_cubes import iris.std_names import iris.util +from iris.coords import AncillaryVariable, CellMeasure import iris.coord_systems as icoord_systems import iris.tests.stock as stock from iris._lazy_data import is_lazy_data @@ -36,6 +39,13 @@ @tests.skip_data class TestNetCDFLoad(tests.IrisTest): + def setUp(self): + self.tmpdir = None + + def tearDown(self): + if self.tmpdir is not None: + shutil.rmtree(self.tmpdir) + def test_monotonic(self): cubes = iris.load( tests.get_data_path( @@ -240,6 +250,104 @@ def test_cell_methods(self): self.assertCML(cubes, ("netcdf", "netcdf_cell_methods.cml")) + def test_ancillary_variables(self): + # Note: using a CDL string as a test data reference, rather than a binary file. + ref_cdl = """ + netcdf cm_attr { + dimensions: + axv = 3 ; + variables: + int64 qqv(axv) ; + qqv:long_name = "qq" ; + qqv:units = "1" ; + qqv:ancillary_variables = "my_av" ; + int64 axv(axv) ; + axv:units = "1" ; + axv:long_name = "x" ; + double my_av(axv) ; + my_av:units = "1" ; + my_av:long_name = "refs" ; + my_av:custom = "extra-attribute"; + data: + axv = 1, 2, 3; + my_av = 11., 12., 13.; + } + """ + self.tmpdir = tempfile.mkdtemp() + cdl_path = os.path.join(self.tmpdir, "tst.cdl") + nc_path = os.path.join(self.tmpdir, "tst.nc") + # Write CDL string into a temporary CDL file. + with open(cdl_path, "w") as f_out: + f_out.write(ref_cdl) + # Use ncgen to convert this into an actual (temporary) netCDF file. + command = "ncgen -o {} {}".format(nc_path, cdl_path) + check_call(command, shell=True) + # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + cubes = list(nc_load_cubes(nc_path)) + self.assertEqual(len(cubes), 1) + avs = cubes[0].ancillary_variables() + self.assertEqual(len(avs), 1) + expected = AncillaryVariable( + np.ma.array([11.0, 12.0, 13.0]), + long_name="refs", + var_name="my_av", + units="1", + attributes={"custom": "extra-attribute"}, + ) + self.assertEqual(avs[0], expected) + + def test_cell_measures(self): + # Note: using a CDL string as a test data reference, rather than a binary file. + ref_cdl = """ + netcdf cm_attr { + dimensions: + axv = 3 ; + ayv = 2 ; + variables: + int64 qqv(ayv, axv) ; + qqv:long_name = "qq" ; + qqv:units = "1" ; + qqv:cell_measures = "area: my_areas" ; + int64 ayv(ayv) ; + ayv:units = "1" ; + ayv:long_name = "y" ; + int64 axv(axv) ; + axv:units = "1" ; + axv:long_name = "x" ; + double my_areas(ayv, axv) ; + my_areas:units = "m2" ; + my_areas:long_name = "standardised cell areas" ; + my_areas:custom = "extra-attribute"; + data: + axv = 11, 12, 13; + ayv = 21, 22; + my_areas = 110., 120., 130., 221., 231., 241.; + } + """ + self.tmpdir = tempfile.mkdtemp() + cdl_path = os.path.join(self.tmpdir, "tst.cdl") + nc_path = os.path.join(self.tmpdir, "tst.nc") + # Write CDL string into a temporary CDL file. + with open(cdl_path, "w") as f_out: + f_out.write(ref_cdl) + # Use ncgen to convert this into an actual (temporary) netCDF file. + command = "ncgen -o {} {}".format(nc_path, cdl_path) + check_call(command, shell=True) + # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + cubes = list(nc_load_cubes(nc_path)) + self.assertEqual(len(cubes), 1) + cms = cubes[0].cell_measures() + self.assertEqual(len(cms), 1) + expected = CellMeasure( + np.ma.array([[110.0, 120.0, 130.0], [221.0, 231.0, 241.0]]), + measure="area", + var_name="my_areas", + long_name="standardised cell areas", + units="m2", + attributes={"custom": "extra-attribute"}, + ) + self.assertEqual(cms[0], expected) + def test_deferred_loading(self): # Test exercising CF-netCDF deferred loading and deferred slicing. # shape (31, 161, 320)