Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide info on pp-field indices in the file for structured um loads. #2977

Merged
merged 5 commits into from
Oct 8, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
* The :class:`iris.fileformats.um.FieldCollation` objects, which are passed
into load callbacks when using
:func:`iris.fileformats.um.structured_um_loading`, now
have the additional properties :
:data:`iris.fileformats.um.FieldCollation.data_filepath` and
:data:`iris.fileformats.um.FieldCollation.data_field_indices`.
These provide the file locations of the original data fields, which are
otherwise lost in the structured loading process.
3 changes: 2 additions & 1 deletion lib/iris/fileformats/pp.py
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ def _pp_attribute_names(header_defn):
special_headers = list('_' + name for name in _SPECIAL_HEADERS)
extra_data = list(EXTRA_DATA.values())
special_attributes = ['_raw_header', 'raw_lbtim', 'raw_lbpack',
'boundary_packing']
'boundary_packing', '_index_in_structured_load_file']
return normal_headers + special_headers + extra_data + special_attributes


Expand Down Expand Up @@ -864,6 +864,7 @@ def __init__(self, header=None):
self.raw_lbtim = None
self.raw_lbpack = None
self.boundary_packing = None
self._index_in_structured_load_file = None
if header is not None:
self.raw_lbtim = header[self.HEADER_DICT['lbtim'][0]]
self.raw_lbpack = header[self.HEADER_DICT['lbpack'][0]]
Expand Down
5 changes: 2 additions & 3 deletions lib/iris/fileformats/um/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2014 - 2017, Met Office
# (C) British Crown Copyright 2014 - 2018, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -27,7 +27,6 @@

# Publish the FF-replacement features here, and include documentation.
from ._ff_replacement import um_to_pp, load_cubes, load_cubes_32bit_ieee
from ._fast_load import structured_um_loading
from ._fast_load_structured_fields import FieldCollation
from ._fast_load import structured_um_loading, FieldCollation
__all__ = ['um_to_pp', 'load_cubes', 'load_cubes_32bit_ieee',
'structured_um_loading', 'FieldCollation']
76 changes: 67 additions & 9 deletions lib/iris/fileformats/um/_fast_load.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2016 - 2017, Met Office
# (C) British Crown Copyright 2016 - 2018, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -40,19 +40,67 @@
import threading
import os.path

import numpy as np

# Be minimal about what we import from iris, to avoid circular imports.
# Below, other parts of iris.fileformats are accessed via deferred imports.
import iris
from iris.coords import DimCoord
from iris.cube import CubeList
from iris.exceptions import TranslationError

from iris.fileformats.um._fast_load_structured_fields import \
BasicFieldCollation, group_structured_fields

# Strings to identify the PP and FF file format handler specs.
_FF_SPEC_NAME = 'UM Fieldsfile'
_PP_SPEC_NAME = 'UM Post Processing file'


class FieldCollation(BasicFieldCollation):
# This class specialises the BasicFieldCollation by adding the file-index
# and file-path concepts.
# This preserves the more abstract scope of the original 'FieldCollation'
# class, now renamed 'BasicFieldCollation'.

def __init__(self, fields, filepath):
"""
Args:

* fields (iterable of :class:`iris.fileformats.pp.PPField`):
The fields in the collation.

* filepath (string):
The path of the file the collation is loaded from.

"""
super(FieldCollation, self).__init__(fields)
self._load_filepath = filepath

@property
def data_filepath(self):
return self._load_filepath

@property
def data_field_indices(self):
"""
Field indices of the contained PPFields in the input file.

This records the original file location of the individual data fields
contained, within the input datafile.

Returns:
An integer array of shape `self.vector_dims_shape`.

"""
# Get shape : N.B. this calculates (and caches) the structure.
vector_dims_shape = self.vector_dims_shape
# Get index-in-file of each contained field.
indices = np.array([field._index_in_structured_load_file
for field in self._fields],
dtype=np.int64)
return indices.reshape(vector_dims_shape)


def _basic_load_function(filename, pp_filter=None, **kwargs):
# The low-level 'fields from filename' loader.
#
Expand All @@ -71,8 +119,6 @@ def _basic_load_function(filename, pp_filter=None, **kwargs):
# Therefore, the actual loader will pass this as the 'pp_filter' keyword,
# when it is present.
# Additional load keywords are 'passed on' to the lower-level function.
from iris.fileformats.um._fast_load_structured_fields import \
group_structured_fields

# Helper function to select the correct fields loader call.
def _select_raw_fields_loader(fname):
Expand All @@ -98,10 +144,20 @@ def _select_raw_fields_loader(fname):
return loader

loader = _select_raw_fields_loader(filename)
fields = iter(field
for field in loader(filename, **kwargs)
if pp_filter is None or pp_filter(field))
return group_structured_fields(fields)

def iter_fields_decorated_with_load_indices(fields_iter):
for i_field, field in enumerate(fields_iter):
field._index_in_structured_load_file = i_field
yield field

fields = iter_fields_decorated_with_load_indices(
field
for field in loader(filename, **kwargs)
if pp_filter is None or pp_filter(field))

return group_structured_fields(fields,
collation_class=FieldCollation,
filepath=filename)


# Define the preferred order of candidate dimension coordinates, as used by
Expand Down Expand Up @@ -342,7 +398,9 @@ def structured_um_loading():
which is normally the whole of one phenomenon from a single input file.
In particular, the callback's "field" argument is a
:class:`~iris.fileformats.um.FieldCollation`, from which "field.fields"
gives a *list* of PPFields from which that cube was built.
gives a *list* of PPFields from which that cube was built, and the
properties "field.load_filepath" and "field.load_file_indices"
reference the original file locations of the cube data.
The code required is therefore different from a 'normal' callback.
For an example of this, see `this example in the Iris test code
<https://github.com/SciTools/iris/
Expand Down
27 changes: 18 additions & 9 deletions lib/iris/fileformats/um/_fast_load_structured_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
Code for fast loading of structured UM data.

This module defines which pp-field elements take part in structured loading,
and provides creation of :class:`FieldCollation` objects from lists of
and provides creation of :class:`BasicFieldCollation` objects from lists of
:class:`iris.fileformats.pp.PPField`.

"""
Expand All @@ -36,18 +36,18 @@
optimal_array_structure


class FieldCollation(object):
class BasicFieldCollation(object):
"""
An object representing a group of UM fields with array structure that can
be vectorized into a single cube.

For example:

Suppose we have a set of 28 fields repeating over 7 vertical levels for
each of 4 different data times. If a FieldCollation is created to contain
these, it can identify that this is a 4*7 regular array structure.
each of 4 different data times. If a BasicFieldCollation is created to
contain these, it can identify that this is a 4*7 regular array structure.

This FieldCollation will then have the following properties:
This BasicFieldCollation will then have the following properties:

* within 'element_arrays_and_dims' :
Element 'blev' have the array shape (7,) and dims of (1,).
Expand Down Expand Up @@ -259,7 +259,9 @@ def _um_collation_key_function(field):
# vector pseudo-level coordinate directly in the structured load analysis.


def group_structured_fields(field_iterator):
def group_structured_fields(field_iterator,
collation_class=BasicFieldCollation,
**collation_kwargs):
"""
Collect structured fields into identified groups whose fields can be
combined to form a single cube.
Expand All @@ -269,6 +271,13 @@ def group_structured_fields(field_iterator):
* field_iterator (iterator of :class:`iris.fileformats.pp.PPField`):
A source of PP or FF fields. N.B. order is significant.

Kwargs:

* collation_class (class):
Type of collation wrapper to create from each group of fields.
* collation_kwargs (dict):
Additional constructor keywords for collation creation.

The function sorts and collates on phenomenon-relevant metadata only,
defined as the field components: 'lbuser[3]' (stash), 'lbproc' (statistic),
'lbuser[6]' (model).
Expand All @@ -285,8 +294,8 @@ def group_structured_fields(field_iterator):
:func:`iris.fileformats.pp_load_rules._convert_time_coords`).

Returns:
A generator of FieldCollation objects, each of which contains a single
collated group from the input fields.
A generator of BasicFieldCollation objects, each of which contains a
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a small point perhaps, but it's not guaranteed that the generator will produce BasicFieldCollation objects, given that you can specify the collation_class in the constructor.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

single collated group from the input fields.

.. note::

Expand All @@ -297,4 +306,4 @@ def group_structured_fields(field_iterator):
"""
_fields = sorted(field_iterator, key=_um_collation_key_function)
for _, fields in itertools.groupby(_fields, _um_collation_key_function):
yield FieldCollation(tuple(fields))
yield collation_class(tuple(fields), **collation_kwargs)
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# (C) British Crown Copyright 2018, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""
Unit tests for the class
:class:`iris.fileformats.um._fast_load.FieldCollation`.

This only tests the additional functionality for recording file locations of
PPFields that make loaded cubes.
The original class is the baseclass of this, now renamed 'BasicFieldCollation'.

"""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# import iris tests first so that some things can be initialised
# before importing anything else.
import iris.tests as tests

import numpy as np

import iris

from iris.tests.integration.fast_load.test_fast_load import Mixin_FieldTest


class TestFastCallbackLocationInfo(Mixin_FieldTest, tests.IrisTest):
do_fast_loads = True

def setUp(self):
# Call parent setup.
super(TestFastCallbackLocationInfo, self).setUp()

# Create a basic load test case.
self.callback_collations = []
self.callback_filepaths = []

def fast_load_callback(cube, collation, filename):
self.callback_collations.append(collation)
self.callback_filepaths.append(filename)

flds = self.fields(c_t='11112222', c_h='11221122', phn='01010101')
self.test_filepath = self.save_fieldcubes(flds)
iris.load(self.test_filepath, callback=fast_load_callback)

def test_callback_collations_filepaths(self):
self.assertEqual(len(self.callback_collations), 2)
self.assertEqual(self.callback_collations[0].data_filepath,
self.test_filepath)
self.assertEqual(self.callback_collations[1].data_filepath,
self.test_filepath)

def test_callback_collations_field_indices(self):
self.assertEqual(
self.callback_collations[0].data_field_indices.dtype, np.int64)
self.assertArrayEqual(
self.callback_collations[0].data_field_indices,
[[1, 3], [5, 7]])

self.assertEqual(
self.callback_collations[1].data_field_indices.dtype, np.int64)
self.assertArrayEqual(
self.callback_collations[1].data_field_indices,
[[0, 2], [4, 6]])


if __name__ == '__main__':
tests.main()
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
from cftime import datetime
import numpy as np

from iris.fileformats.um._fast_load_structured_fields import FieldCollation
from iris.fileformats.um._fast_load_structured_fields \
import BasicFieldCollation as FieldCollation
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pp-mo don't forget to also change the module docstring to also point to this new class.

Copy link
Member Author

@pp-mo pp-mo Oct 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, good spot !

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have also changed the BasicFieldCollation as FieldCollation usage in this file, because I think that is unreasonably confusing.

import iris.fileformats.pp


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2014 - 2015, Met Office
# (C) British Crown Copyright 2014 - 2018, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -71,9 +71,7 @@ def _dummy_fields_iter(self, stashes=None, models=None, lbprocs=None):

def _group_result(self, fields):
# Run the testee, but returning just the groups (not FieldCollations).
with mock.patch('iris.fileformats.um._fast_load_structured_fields.'
'FieldCollation', new=lambda args: args):
result = list(group_structured_fields(fields))
result = list(group_structured_fields(fields, collation_class=tuple))
return result

def _test_fields(self, item):
Expand Down