Skip to content

Commit

Permalink
PI-3473: Netcdf loading ancillary variables (SciTools#3556)
Browse files Browse the repository at this point in the history
* _regrid_area_weighted_array: Tweak variable order to near other use in code (SciTools#3571)

* Fix problems with export and echo command. (SciTools#3577)

* Pushdocs fix2 (SciTools#3580)

* Revert to single-line command for doctr invocation.

* Added script comment, partly to force Github respin.

* Added whatsnew for Black. (SciTools#3581)

* Fixes required due to the release of iris-grib v0.15.0 (SciTools#3582)

* Fix python-eccodes pin in travis (SciTools#3593)

* Netcdf load of ancillary vars: first working.
  • Loading branch information
pp-mo authored and abooton committed Jun 5, 2020
1 parent 738093f commit 363b9f8
Show file tree
Hide file tree
Showing 3 changed files with 207 additions and 21 deletions.
76 changes: 69 additions & 7 deletions lib/iris/fileformats/_pyke_rules/fc_rules_cf.krb
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,22 @@ fc_build_cell_measure
python engine.rule_triggered.add(rule.name)


#
# Context:
# This rule will trigger for each ancillary_variable case specific fact.
#
# Purpose:
# Add the ancillary variable to the cube.
#
fc_build_ancil_var
foreach
facts_cf.ancillary_variable($var)
assert
python ancil_var = engine.cf_var.cf_group.ancillary_variables[$var]
python build_ancil_var(engine, ancil_var)
python engine.rule_triggered.add(rule.name)


#
# Context:
# This rule will trigger iff a CF latitude coordinate exists and
Expand Down Expand Up @@ -1941,36 +1957,37 @@ fc_extras
# Add it to the cube
cube.add_aux_coord(coord, data_dims)

# Update the coordinate to CF-netCDF variable mapping.
# Make a list with names, stored on the engine, so we can find them all later.
engine.provides['coordinates'].append((coord, cf_coord_var.cf_name))


################################################################################
def build_cell_measures(engine, cf_cm_attr, coord_name=None):
def build_cell_measures(engine, cf_cm_var):
"""Create a CellMeasure instance and add it to the cube."""
cf_var = engine.cf_var
cube = engine.cube
attributes = {}

# Get units
attr_units = get_attr_units(cf_cm_attr, attributes)
attr_units = get_attr_units(cf_cm_var, attributes)

data = _get_cf_var_data(cf_cm_attr, engine.filename)
# Get (lazy) content array
data = _get_cf_var_data(cf_cm_var, engine.filename)

# Determine the name of the dimension/s shared between the CF-netCDF data variable
# and the coordinate being built.
common_dims = [dim for dim in cf_cm_attr.dimensions
common_dims = [dim for dim in cf_cm_var.dimensions
if dim in cf_var.dimensions]
data_dims = None
if common_dims:
# Calculate the offset of each common dimension.
data_dims = [cf_var.dimensions.index(dim) for dim in common_dims]

# Determine the standard_name, long_name and var_name
standard_name, long_name, var_name = get_names(cf_cm_attr, coord_name, attributes)
standard_name, long_name, var_name = get_names(cf_cm_var, None, attributes)

# Obtain the cf_measure.
measure = cf_cm_attr.cf_measure
measure = cf_cm_var.cf_measure

# Create the CellMeasure
cell_measure = iris.coords.CellMeasure(data,
Expand All @@ -1984,6 +2001,51 @@ fc_extras
# Add it to the cube
cube.add_cell_measure(cell_measure, data_dims)

# Make a list with names, stored on the engine, so we can find them all later.
engine.provides['cell_measures'].append((cell_measure, cf_cm_var.cf_name))



################################################################################
def build_ancil_var(engine, cf_av_var):
"""Create an AncillaryVariable instance and add it to the cube."""
cf_var = engine.cf_var
cube = engine.cube
attributes = {}

# Get units
attr_units = get_attr_units(cf_av_var, attributes)

# Get (lazy) content array
data = _get_cf_var_data(cf_av_var, engine.filename)

# Determine the name of the dimension/s shared between the CF-netCDF data variable
# and the AV being built.
common_dims = [dim for dim in cf_av_var.dimensions
if dim in cf_var.dimensions]
data_dims = None
if common_dims:
# Calculate the offset of each common dimension.
data_dims = [cf_var.dimensions.index(dim) for dim in common_dims]

# Determine the standard_name, long_name and var_name
standard_name, long_name, var_name = get_names(cf_av_var, None, attributes)

# Create the AncillaryVariable
av = iris.coords.AncillaryVariable(
data,
standard_name=standard_name,
long_name=long_name,
var_name=var_name,
units=attr_units,
attributes=attributes)

# Add it to the cube
cube.add_ancillary_variable(av, data_dims)

# Make a list with names, stored on the engine, so we can find them all later.
engine.provides['ancillary_variables'].append((av, cf_av_var.cf_name))



################################################################################
Expand Down
44 changes: 30 additions & 14 deletions lib/iris/fileformats/netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,10 @@ def __setstate__(self, state):

def _assert_case_specific_facts(engine, cf, cf_group):
# Initialise pyke engine "provides" hooks.
# These are used to patch non-processed element attributes after rules activation.
engine.provides["coordinates"] = []
engine.provides["cell_measures"] = []
engine.provides["ancillary_variables"] = []

# Assert facts for CF coordinates.
for cf_name in cf_group.coordinates.keys():
Expand All @@ -479,6 +482,12 @@ def _assert_case_specific_facts(engine, cf, cf_group):
_PYKE_FACT_BASE, "cell_measure", (cf_name,)
)

# Assert facts for CF ancillary variables.
for cf_name in cf_group.ancillary_variables.keys():
engine.add_case_specific_fact(
_PYKE_FACT_BASE, "ancillary_variable", (cf_name,)
)

# Assert facts for CF grid_mappings.
for cf_name in cf_group.grid_mappings.keys():
engine.add_case_specific_fact(
Expand Down Expand Up @@ -597,31 +606,38 @@ def _load_cube(engine, cf, cf_var, filename):
# Run pyke inference engine with forward chaining rules.
engine.activate(_PYKE_RULE_BASE)

# Populate coordinate attributes with the untouched attributes from the
# associated CF-netCDF variable.
coordinates = engine.provides.get("coordinates", [])

# Having run the rules, now populate the attributes of all the cf elements with the
# "unused" attributes from the associated CF-netCDF variable.
# That is, all those that aren't CF reserved terms.
def attribute_predicate(item):
return item[0] not in _CF_ATTRS

for coord, cf_var_name in coordinates:
tmpvar = filter(
attribute_predicate, cf.cf_group[cf_var_name].cf_attrs_unused()
)
def add_unused_attributes(iris_object, cf_var):
tmpvar = filter(attribute_predicate, cf_var.cf_attrs_unused())
for attr_name, attr_value in tmpvar:
_set_attributes(coord.attributes, attr_name, attr_value)
_set_attributes(iris_object.attributes, attr_name, attr_value)

def fix_attributes_all_elements(role_name):
elements_and_names = engine.provides.get(role_name, [])

for iris_object, cf_var_name in elements_and_names:
add_unused_attributes(iris_object, cf.cf_group[cf_var_name])

# Populate the attributes of all coordinates, cell-measures and ancillary-vars.
fix_attributes_all_elements("coordinates")
fix_attributes_all_elements("ancillary_variables")
fix_attributes_all_elements("cell_measures")

tmpvar = filter(attribute_predicate, cf_var.cf_attrs_unused())
# Attach untouched attributes of the associated CF-netCDF data variable to
# the cube.
for attr_name, attr_value in tmpvar:
_set_attributes(cube.attributes, attr_name, attr_value)
# Also populate attributes of the top-level cube itself.
add_unused_attributes(cube, cf_var)

# Work out reference names for all the coords.
names = {
coord.var_name: coord.standard_name or coord.var_name or "unknown"
for coord in cube.coords()
}

# Add all the cube cell methods.
cube.cell_methods = [
iris.coords.CellMethod(
method=method.method,
Expand Down
108 changes: 108 additions & 0 deletions lib/iris/tests/test_netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import os.path
import shutil
import stat
from subprocess import check_call
import tempfile
from unittest import mock

Expand All @@ -27,15 +28,24 @@
import iris.analysis.trajectory
import iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc as pyke_rules
import iris.fileformats.netcdf
from iris.fileformats.netcdf import load_cubes as nc_load_cubes
import iris.std_names
import iris.util
from iris.coords import AncillaryVariable, CellMeasure
import iris.coord_systems as icoord_systems
import iris.tests.stock as stock
from iris._lazy_data import is_lazy_data


@tests.skip_data
class TestNetCDFLoad(tests.IrisTest):
def setUp(self):
self.tmpdir = None

def tearDown(self):
if self.tmpdir is not None:
shutil.rmtree(self.tmpdir)

def test_monotonic(self):
cubes = iris.load(
tests.get_data_path(
Expand Down Expand Up @@ -240,6 +250,104 @@ def test_cell_methods(self):

self.assertCML(cubes, ("netcdf", "netcdf_cell_methods.cml"))

def test_ancillary_variables(self):
# Note: using a CDL string as a test data reference, rather than a binary file.
ref_cdl = """
netcdf cm_attr {
dimensions:
axv = 3 ;
variables:
int64 qqv(axv) ;
qqv:long_name = "qq" ;
qqv:units = "1" ;
qqv:ancillary_variables = "my_av" ;
int64 axv(axv) ;
axv:units = "1" ;
axv:long_name = "x" ;
double my_av(axv) ;
my_av:units = "1" ;
my_av:long_name = "refs" ;
my_av:custom = "extra-attribute";
data:
axv = 1, 2, 3;
my_av = 11., 12., 13.;
}
"""
self.tmpdir = tempfile.mkdtemp()
cdl_path = os.path.join(self.tmpdir, "tst.cdl")
nc_path = os.path.join(self.tmpdir, "tst.nc")
# Write CDL string into a temporary CDL file.
with open(cdl_path, "w") as f_out:
f_out.write(ref_cdl)
# Use ncgen to convert this into an actual (temporary) netCDF file.
command = "ncgen -o {} {}".format(nc_path, cdl_path)
check_call(command, shell=True)
# Load with iris.fileformats.netcdf.load_cubes, and check expected content.
cubes = list(nc_load_cubes(nc_path))
self.assertEqual(len(cubes), 1)
avs = cubes[0].ancillary_variables()
self.assertEqual(len(avs), 1)
expected = AncillaryVariable(
np.ma.array([11.0, 12.0, 13.0]),
long_name="refs",
var_name="my_av",
units="1",
attributes={"custom": "extra-attribute"},
)
self.assertEqual(avs[0], expected)

def test_cell_measures(self):
# Note: using a CDL string as a test data reference, rather than a binary file.
ref_cdl = """
netcdf cm_attr {
dimensions:
axv = 3 ;
ayv = 2 ;
variables:
int64 qqv(ayv, axv) ;
qqv:long_name = "qq" ;
qqv:units = "1" ;
qqv:cell_measures = "area: my_areas" ;
int64 ayv(ayv) ;
ayv:units = "1" ;
ayv:long_name = "y" ;
int64 axv(axv) ;
axv:units = "1" ;
axv:long_name = "x" ;
double my_areas(ayv, axv) ;
my_areas:units = "m2" ;
my_areas:long_name = "standardised cell areas" ;
my_areas:custom = "extra-attribute";
data:
axv = 11, 12, 13;
ayv = 21, 22;
my_areas = 110., 120., 130., 221., 231., 241.;
}
"""
self.tmpdir = tempfile.mkdtemp()
cdl_path = os.path.join(self.tmpdir, "tst.cdl")
nc_path = os.path.join(self.tmpdir, "tst.nc")
# Write CDL string into a temporary CDL file.
with open(cdl_path, "w") as f_out:
f_out.write(ref_cdl)
# Use ncgen to convert this into an actual (temporary) netCDF file.
command = "ncgen -o {} {}".format(nc_path, cdl_path)
check_call(command, shell=True)
# Load with iris.fileformats.netcdf.load_cubes, and check expected content.
cubes = list(nc_load_cubes(nc_path))
self.assertEqual(len(cubes), 1)
cms = cubes[0].cell_measures()
self.assertEqual(len(cms), 1)
expected = CellMeasure(
np.ma.array([[110.0, 120.0, 130.0], [221.0, 231.0, 241.0]]),
measure="area",
var_name="my_areas",
long_name="standardised cell areas",
units="m2",
attributes={"custom": "extra-attribute"},
)
self.assertEqual(cms[0], expected)

def test_deferred_loading(self):
# Test exercising CF-netCDF deferred loading and deferred slicing.
# shape (31, 161, 320)
Expand Down

0 comments on commit 363b9f8

Please sign in to comment.