PI-3473: Netcdf loading ancillary variables (SciTools#3556)

* _regrid_area_weighted_array: Tweak variable order to near other use in code (SciTools#3571) * Fix problems with export and echo command. (SciTools#3577) * Pushdocs fix2 (SciTools#3580) * Revert to single-line command for doctr invocation. * Added script comment, partly to force Github respin. * Added whatsnew for Black. (SciTools#3581) * Fixes required due to the release of iris-grib v0.15.0 (SciTools#3582) * Fix python-eccodes pin in travis (SciTools#3593) * Netcdf load of ancillary vars: first working.
abooton · Jun 5, 2020 · 363b9f8 · 363b9f8
1 parent 738093f
commit 363b9f8
Show file tree

Hide file tree

Showing 3 changed files with 207 additions and 21 deletions.
diff --git a/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb b/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb
@@ -498,6 +498,22 @@ fc_build_cell_measure
         python engine.rule_triggered.add(rule.name)
 
 
+#
+# Context:
+#   This rule will trigger for each ancillary_variable case specific fact.
+#
+# Purpose:
+#   Add the ancillary variable to the cube.
+#
+fc_build_ancil_var
+    foreach
+        facts_cf.ancillary_variable($var)
+    assert
+        python ancil_var = engine.cf_var.cf_group.ancillary_variables[$var]
+        python build_ancil_var(engine, ancil_var)
+        python engine.rule_triggered.add(rule.name)
+
+
 #
 # Context:
 #   This rule will trigger iff a CF latitude coordinate exists and
@@ -1941,36 +1957,37 @@ fc_extras
         # Add it to the cube
         cube.add_aux_coord(coord, data_dims)
 
-        # Update the coordinate to CF-netCDF variable mapping.
+        # Make a list with names, stored on the engine, so we can find them all later.
         engine.provides['coordinates'].append((coord, cf_coord_var.cf_name))
 
 
     ################################################################################
-    def build_cell_measures(engine, cf_cm_attr, coord_name=None):
+    def build_cell_measures(engine, cf_cm_var):
         """Create a CellMeasure instance and add it to the cube."""
         cf_var = engine.cf_var
         cube = engine.cube
         attributes = {}
 
         # Get units
-        attr_units = get_attr_units(cf_cm_attr, attributes)
+        attr_units = get_attr_units(cf_cm_var, attributes)
 
-        data = _get_cf_var_data(cf_cm_attr, engine.filename)
+        # Get (lazy) content array
+        data = _get_cf_var_data(cf_cm_var, engine.filename)
 
         # Determine the name of the dimension/s shared between the CF-netCDF data variable
         # and the coordinate being built.
-        common_dims = [dim for dim in cf_cm_attr.dimensions
+        common_dims = [dim for dim in cf_cm_var.dimensions
                        if dim in cf_var.dimensions]
         data_dims = None
         if common_dims:
             # Calculate the offset of each common dimension.
             data_dims = [cf_var.dimensions.index(dim) for dim in common_dims]
 
         # Determine the standard_name, long_name and var_name
-        standard_name, long_name, var_name = get_names(cf_cm_attr, coord_name, attributes)
+        standard_name, long_name, var_name = get_names(cf_cm_var, None, attributes)
 
         # Obtain the cf_measure.
-        measure = cf_cm_attr.cf_measure
+        measure = cf_cm_var.cf_measure
 
         # Create the CellMeasure
         cell_measure = iris.coords.CellMeasure(data,
@@ -1984,6 +2001,51 @@ fc_extras
         # Add it to the cube
         cube.add_cell_measure(cell_measure, data_dims)
 
+        # Make a list with names, stored on the engine, so we can find them all later.
+        engine.provides['cell_measures'].append((cell_measure, cf_cm_var.cf_name))
+
+
+
+    ################################################################################
+    def build_ancil_var(engine, cf_av_var):
+        """Create an AncillaryVariable instance and add it to the cube."""
+        cf_var = engine.cf_var
+        cube = engine.cube
+        attributes = {}
+
+        # Get units
+        attr_units = get_attr_units(cf_av_var, attributes)
+
+        # Get (lazy) content array
+        data = _get_cf_var_data(cf_av_var, engine.filename)
+
+        # Determine the name of the dimension/s shared between the CF-netCDF data variable
+        # and the AV being built.
+        common_dims = [dim for dim in cf_av_var.dimensions
+                       if dim in cf_var.dimensions]
+        data_dims = None
+        if common_dims:
+            # Calculate the offset of each common dimension.
+            data_dims = [cf_var.dimensions.index(dim) for dim in common_dims]
+
+        # Determine the standard_name, long_name and var_name
+        standard_name, long_name, var_name = get_names(cf_av_var, None, attributes)
+
+        # Create the AncillaryVariable
+        av = iris.coords.AncillaryVariable(
+            data,
+            standard_name=standard_name,
+            long_name=long_name,
+            var_name=var_name,
+            units=attr_units,
+            attributes=attributes)
+
+        # Add it to the cube
+        cube.add_ancillary_variable(av, data_dims)
+
+        # Make a list with names, stored on the engine, so we can find them all later.
+        engine.provides['ancillary_variables'].append((av, cf_av_var.cf_name))
+
 
 
     ################################################################################

diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py
@@ -459,7 +459,10 @@ def __setstate__(self, state):
 
 def _assert_case_specific_facts(engine, cf, cf_group):
     # Initialise pyke engine "provides" hooks.
+    # These are used to patch non-processed element attributes after rules activation.
     engine.provides["coordinates"] = []
+    engine.provides["cell_measures"] = []
+    engine.provides["ancillary_variables"] = []
 
     # Assert facts for CF coordinates.
     for cf_name in cf_group.coordinates.keys():
@@ -479,6 +482,12 @@ def _assert_case_specific_facts(engine, cf, cf_group):
             _PYKE_FACT_BASE, "cell_measure", (cf_name,)
         )
 
+    # Assert facts for CF ancillary variables.
+    for cf_name in cf_group.ancillary_variables.keys():
+        engine.add_case_specific_fact(
+            _PYKE_FACT_BASE, "ancillary_variable", (cf_name,)
+        )
+
     # Assert facts for CF grid_mappings.
     for cf_name in cf_group.grid_mappings.keys():
         engine.add_case_specific_fact(
@@ -597,31 +606,38 @@ def _load_cube(engine, cf, cf_var, filename):
     # Run pyke inference engine with forward chaining rules.
     engine.activate(_PYKE_RULE_BASE)
 
-    # Populate coordinate attributes with the untouched attributes from the
-    # associated CF-netCDF variable.
-    coordinates = engine.provides.get("coordinates", [])
-
+    # Having run the rules, now populate the attributes of all the cf elements with the
+    # "unused" attributes from the associated CF-netCDF variable.
+    # That is, all those that aren't CF reserved terms.
     def attribute_predicate(item):
         return item[0] not in _CF_ATTRS
 
-    for coord, cf_var_name in coordinates:
-        tmpvar = filter(
-            attribute_predicate, cf.cf_group[cf_var_name].cf_attrs_unused()
-        )
+    def add_unused_attributes(iris_object, cf_var):
+        tmpvar = filter(attribute_predicate, cf_var.cf_attrs_unused())
         for attr_name, attr_value in tmpvar:
-            _set_attributes(coord.attributes, attr_name, attr_value)
+            _set_attributes(iris_object.attributes, attr_name, attr_value)
+
+    def fix_attributes_all_elements(role_name):
+        elements_and_names = engine.provides.get(role_name, [])
+
+        for iris_object, cf_var_name in elements_and_names:
+            add_unused_attributes(iris_object, cf.cf_group[cf_var_name])
+
+    # Populate the attributes of all coordinates, cell-measures and ancillary-vars.
+    fix_attributes_all_elements("coordinates")
+    fix_attributes_all_elements("ancillary_variables")
+    fix_attributes_all_elements("cell_measures")
 
-    tmpvar = filter(attribute_predicate, cf_var.cf_attrs_unused())
-    # Attach untouched attributes of the associated CF-netCDF data variable to
-    # the cube.
-    for attr_name, attr_value in tmpvar:
-        _set_attributes(cube.attributes, attr_name, attr_value)
+    # Also populate attributes of the top-level cube itself.
+    add_unused_attributes(cube, cf_var)
 
+    # Work out reference names for all the coords.
     names = {
         coord.var_name: coord.standard_name or coord.var_name or "unknown"
         for coord in cube.coords()
     }
 
+    # Add all the cube cell methods.
     cube.cell_methods = [
         iris.coords.CellMethod(
             method=method.method,

diff --git a/lib/iris/tests/test_netcdf.py b/lib/iris/tests/test_netcdf.py
@@ -16,6 +16,7 @@
 import os.path
 import shutil
 import stat
+from subprocess import check_call
 import tempfile
 from unittest import mock
 
@@ -27,15 +28,24 @@
 import iris.analysis.trajectory
 import iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc as pyke_rules
 import iris.fileformats.netcdf
+from iris.fileformats.netcdf import load_cubes as nc_load_cubes
 import iris.std_names
 import iris.util
+from iris.coords import AncillaryVariable, CellMeasure
 import iris.coord_systems as icoord_systems
 import iris.tests.stock as stock
 from iris._lazy_data import is_lazy_data
 
 
 @tests.skip_data
 class TestNetCDFLoad(tests.IrisTest):
+    def setUp(self):
+        self.tmpdir = None
+
+    def tearDown(self):
+        if self.tmpdir is not None:
+            shutil.rmtree(self.tmpdir)
+
     def test_monotonic(self):
         cubes = iris.load(
             tests.get_data_path(
@@ -240,6 +250,104 @@ def test_cell_methods(self):
 
         self.assertCML(cubes, ("netcdf", "netcdf_cell_methods.cml"))
 
+    def test_ancillary_variables(self):
+        # Note: using a CDL string as a test data reference, rather than a binary file.
+        ref_cdl = """
+            netcdf cm_attr {
+            dimensions:
+                axv = 3 ;
+            variables:
+                int64 qqv(axv) ;
+                    qqv:long_name = "qq" ;
+                    qqv:units = "1" ;
+                    qqv:ancillary_variables = "my_av" ;
+                int64 axv(axv) ;
+                    axv:units = "1" ;
+                    axv:long_name = "x" ;
+                double my_av(axv) ;
+                    my_av:units = "1" ;
+                    my_av:long_name = "refs" ;
+                    my_av:custom = "extra-attribute";
+            data:
+                axv = 1, 2, 3;
+                my_av = 11., 12., 13.;
+            }
+            """
+        self.tmpdir = tempfile.mkdtemp()
+        cdl_path = os.path.join(self.tmpdir, "tst.cdl")
+        nc_path = os.path.join(self.tmpdir, "tst.nc")
+        # Write CDL string into a temporary CDL file.
+        with open(cdl_path, "w") as f_out:
+            f_out.write(ref_cdl)
+        # Use ncgen to convert this into an actual (temporary) netCDF file.
+        command = "ncgen -o {} {}".format(nc_path, cdl_path)
+        check_call(command, shell=True)
+        # Load with iris.fileformats.netcdf.load_cubes, and check expected content.
+        cubes = list(nc_load_cubes(nc_path))
+        self.assertEqual(len(cubes), 1)
+        avs = cubes[0].ancillary_variables()
+        self.assertEqual(len(avs), 1)
+        expected = AncillaryVariable(
+            np.ma.array([11.0, 12.0, 13.0]),
+            long_name="refs",
+            var_name="my_av",
+            units="1",
+            attributes={"custom": "extra-attribute"},
+        )
+        self.assertEqual(avs[0], expected)
+
+    def test_cell_measures(self):
+        # Note: using a CDL string as a test data reference, rather than a binary file.
+        ref_cdl = """
+            netcdf cm_attr {
+            dimensions:
+                axv = 3 ;
+                ayv = 2 ;
+            variables:
+                int64 qqv(ayv, axv) ;
+                    qqv:long_name = "qq" ;
+                    qqv:units = "1" ;
+                    qqv:cell_measures = "area: my_areas" ;
+                int64 ayv(ayv) ;
+                    ayv:units = "1" ;
+                    ayv:long_name = "y" ;
+                int64 axv(axv) ;
+                    axv:units = "1" ;
+                    axv:long_name = "x" ;
+                double my_areas(ayv, axv) ;
+                    my_areas:units = "m2" ;
+                    my_areas:long_name = "standardised cell areas" ;
+                    my_areas:custom = "extra-attribute";
+            data:
+                axv = 11, 12, 13;
+                ayv = 21, 22;
+                my_areas = 110., 120., 130., 221., 231., 241.;
+            }
+            """
+        self.tmpdir = tempfile.mkdtemp()
+        cdl_path = os.path.join(self.tmpdir, "tst.cdl")
+        nc_path = os.path.join(self.tmpdir, "tst.nc")
+        # Write CDL string into a temporary CDL file.
+        with open(cdl_path, "w") as f_out:
+            f_out.write(ref_cdl)
+        # Use ncgen to convert this into an actual (temporary) netCDF file.
+        command = "ncgen -o {} {}".format(nc_path, cdl_path)
+        check_call(command, shell=True)
+        # Load with iris.fileformats.netcdf.load_cubes, and check expected content.
+        cubes = list(nc_load_cubes(nc_path))
+        self.assertEqual(len(cubes), 1)
+        cms = cubes[0].cell_measures()
+        self.assertEqual(len(cms), 1)
+        expected = CellMeasure(
+            np.ma.array([[110.0, 120.0, 130.0], [221.0, 231.0, 241.0]]),
+            measure="area",
+            var_name="my_areas",
+            long_name="standardised cell areas",
+            units="m2",
+            attributes={"custom": "extra-attribute"},
+        )
+        self.assertEqual(cms[0], expected)
+
     def test_deferred_loading(self):
         # Test exercising CF-netCDF deferred loading and deferred slicing.
         # shape (31, 161, 320)