From 4885e122e0eb9cdff76fb1a7303289d8e70eb0a6 Mon Sep 17 00:00:00 2001 From: Jon Seddon <17068361+jonseddon@users.noreply.github.com> Date: Tue, 2 Mar 2021 13:45:24 +0000 Subject: [PATCH] um_stash_source attribute improved handling (#4035) * Modified pyke rule * Tests added * Black and whatsnew * Include PR number * Remove latest.rst * Add what's new --- docs/iris/src/whatsnew/3.0.2.rst | 3 +- .../fileformats/_pyke_rules/fc_rules_cf.krb | 35 ++++- lib/iris/tests/test_netcdf.py | 141 ++++++++++++++++++ 3 files changed, 176 insertions(+), 3 deletions(-) diff --git a/docs/iris/src/whatsnew/3.0.2.rst b/docs/iris/src/whatsnew/3.0.2.rst index 1ba6271d47..90ce970326 100644 --- a/docs/iris/src/whatsnew/3.0.2.rst +++ b/docs/iris/src/whatsnew/3.0.2.rst @@ -18,7 +18,8 @@ This document explains the changes made to Iris for this release 🐛 **Bugs Fixed** - #. + #. `@jonseddon`_ handled a malformed ``um_stash_source`` CF variable attribute in + a netCDF file rather than raising a ``ValueError``. (:pull:`4035`) 📚 **Documentation** diff --git a/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb b/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb index d41ec6aa3e..8e41e6c762 100644 --- a/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb +++ b/lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb @@ -1022,8 +1022,7 @@ fc_attribute_ukmo__um_stash_source foreach check hasattr(engine.cf_var, 'ukmo__um_stash_source') or hasattr(engine.cf_var, 'um_stash_source') assert - python attr_value = getattr(engine.cf_var, 'um_stash_source', None) or getattr(engine.cf_var, 'ukmo__um_stash_source') - python engine.cube.attributes['STASH'] = pp.STASH.from_msi(attr_value) + python get_um_stash_source(engine) python engine.rule_triggered.add(rule.name) # @@ -1775,6 +1774,38 @@ fc_extras return cf_bounds_var, climatological + ################################################################################ + def get_um_stash_source(engine): + """ + If the CF variable has the um_stash_source or ukmo__um_stash_source + attributes and they are valid MSI STASH codes then the cube STASH + attribute is set with their value. If the attributes are not valid MSI + stash codes then the corresponding cube attribute is set with the value + from the variable and a warning is displayed. ukmo__um_stash_source + takes precedence over um_stash_source if both attributes exist. + + """ + attr = None + value = None + + for attr_name in ['ukmo__um_stash_source', 'um_stash_source']: + if hasattr(engine.cf_var, attr_name): + attr = attr_name + value = getattr(engine.cf_var, attr_name) + break + + if attr: + try: + stash_code = pp.STASH.from_msi(value) + except (TypeError, ValueError) as exc: + engine.cube.attributes[attr] = value + msg = (f'Unable to set attribute STASH as not a valid MSI ' + f'string "mXXsXXiXXX", got {value}') + warnings.warn(msg) + else: + engine.cube.attributes['STASH'] = stash_code + + ################################################################################ def reorder_bounds_data(bounds_data, cf_bounds_var, cf_coord_var): """ diff --git a/lib/iris/tests/test_netcdf.py b/lib/iris/tests/test_netcdf.py index 2d1b4a53d5..f1cbff2c5f 100644 --- a/lib/iris/tests/test_netcdf.py +++ b/lib/iris/tests/test_netcdf.py @@ -499,6 +499,147 @@ def test_default_units(self): cubes[0].cell_measure("areas").units, as_unit("unknown") ) + def test_um_stash_source(self): + """Test that um_stash_source is converted into a STASH code""" + # Note: using a CDL string as a test data reference, rather than a binary file. + ref_cdl = """ + netcdf cm_attr { + dimensions: + axv = 3 ; + ayv = 2 ; + variables: + int64 qqv(ayv, axv) ; + qqv:long_name = "qq" ; + qqv:ancillary_variables = "my_av" ; + qqv:cell_measures = "area: my_areas" ; + qqv:um_stash_source = "m01s02i003" ; + int64 ayv(ayv) ; + ayv:long_name = "y" ; + int64 axv(axv) ; + axv:units = "1" ; + axv:long_name = "x" ; + double my_av(axv) ; + my_av:long_name = "refs" ; + double my_areas(ayv, axv) ; + my_areas:long_name = "areas" ; + data: + axv = 11, 12, 13; + ayv = 21, 22; + my_areas = 110., 120., 130., 221., 231., 241.; + } + """ + self.tmpdir = tempfile.mkdtemp() + cdl_path = os.path.join(self.tmpdir, "tst.cdl") + nc_path = os.path.join(self.tmpdir, "tst.nc") + # Write CDL string into a temporary CDL file. + with open(cdl_path, "w") as f_out: + f_out.write(ref_cdl) + # Use ncgen to convert this into an actual (temporary) netCDF file. + command = "ncgen -o {} {}".format(nc_path, cdl_path) + check_call(command, shell=True) + # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + cubes = list(nc_load_cubes(nc_path)) + self.assertEqual(len(cubes), 1) + self.assertEqual( + cubes[0].attributes["STASH"], iris.fileformats.pp.STASH(1, 2, 3) + ) + + def test_ukmo__um_stash_source_priority(self): + """ + Test that ukmo__um_stash_source is converted into a STASH code with a + higher priority than um_stash_source. + """ + # Note: using a CDL string as a test data reference, rather than a binary file. + ref_cdl = """ + netcdf cm_attr { + dimensions: + axv = 3 ; + ayv = 2 ; + variables: + int64 qqv(ayv, axv) ; + qqv:long_name = "qq" ; + qqv:ancillary_variables = "my_av" ; + qqv:cell_measures = "area: my_areas" ; + qqv:um_stash_source = "m01s02i003" ; + qqv:ukmo__um_stash_source = "m09s08i007" ; + int64 ayv(ayv) ; + ayv:long_name = "y" ; + int64 axv(axv) ; + axv:units = "1" ; + axv:long_name = "x" ; + double my_av(axv) ; + my_av:long_name = "refs" ; + double my_areas(ayv, axv) ; + my_areas:long_name = "areas" ; + data: + axv = 11, 12, 13; + ayv = 21, 22; + my_areas = 110., 120., 130., 221., 231., 241.; + } + """ + self.tmpdir = tempfile.mkdtemp() + cdl_path = os.path.join(self.tmpdir, "tst.cdl") + nc_path = os.path.join(self.tmpdir, "tst.nc") + # Write CDL string into a temporary CDL file. + with open(cdl_path, "w") as f_out: + f_out.write(ref_cdl) + # Use ncgen to convert this into an actual (temporary) netCDF file. + command = "ncgen -o {} {}".format(nc_path, cdl_path) + check_call(command, shell=True) + # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + cubes = list(nc_load_cubes(nc_path)) + self.assertEqual(len(cubes), 1) + self.assertEqual( + cubes[0].attributes["STASH"], iris.fileformats.pp.STASH(9, 8, 7) + ) + + def test_bad_um_stash_source(self): + """Test that um_stash_source not in strict MSI form is kept""" + # Note: using a CDL string as a test data reference, rather than a binary file. + ref_cdl = """ + netcdf cm_attr { + dimensions: + axv = 3 ; + ayv = 2 ; + variables: + int64 qqv(ayv, axv) ; + qqv:long_name = "qq" ; + qqv:ancillary_variables = "my_av" ; + qqv:cell_measures = "area: my_areas" ; + qqv:um_stash_source = "10*m01s02i003" ; + int64 ayv(ayv) ; + ayv:long_name = "y" ; + int64 axv(axv) ; + axv:units = "1" ; + axv:long_name = "x" ; + double my_av(axv) ; + my_av:long_name = "refs" ; + double my_areas(ayv, axv) ; + my_areas:long_name = "areas" ; + data: + axv = 11, 12, 13; + ayv = 21, 22; + my_areas = 110., 120., 130., 221., 231., 241.; + } + """ + self.tmpdir = tempfile.mkdtemp() + cdl_path = os.path.join(self.tmpdir, "tst.cdl") + nc_path = os.path.join(self.tmpdir, "tst.nc") + # Write CDL string into a temporary CDL file. + with open(cdl_path, "w") as f_out: + f_out.write(ref_cdl) + # Use ncgen to convert this into an actual (temporary) netCDF file. + command = "ncgen -o {} {}".format(nc_path, cdl_path) + check_call(command, shell=True) + # Load with iris.fileformats.netcdf.load_cubes, and check expected content. + with self.assertWarns(UserWarning): + cubes = list(nc_load_cubes(nc_path)) + self.assertEqual(len(cubes), 1) + self.assertFalse(hasattr(cubes[0].attributes, "STASH")) + self.assertEqual( + cubes[0].attributes["um_stash_source"], "10*m01s02i003" + ) + def test_units(self): # Test exercising graceful cube and coordinate units loading. cube0, cube1 = sorted(