SciTools · DPeterK · Oct 8, 2018 · Mar 13, 2018 · Mar 13, 2018 · Mar 13, 2018
diff --git a/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Apr-12_fast_load_file_indices.txt b/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Apr-12_fast_load_file_indices.txt
@@ -0,0 +1,8 @@
+* The :class:`iris.fileformats.um.FieldCollation` objects, which are passed
+  into load callbacks when using
+  :func:`iris.fileformats.um.structured_um_loading`, now
+  have the additional properties :
+  :data:`iris.fileformats.um.FieldCollation.data_filepath` and
+  :data:`iris.fileformats.um.FieldCollation.data_field_indices`.
+  These provide the file locations of the original data fields, which are
+  otherwise lost in the structured loading process.
diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
@@ -831,7 +831,7 @@ def _pp_attribute_names(header_defn):
     special_headers = list('_' + name for name in _SPECIAL_HEADERS)
     extra_data = list(EXTRA_DATA.values())
     special_attributes = ['_raw_header', 'raw_lbtim', 'raw_lbpack',
-                          'boundary_packing']
+                          'boundary_packing', '_index_in_structured_load_file']
     return normal_headers + special_headers + extra_data + special_attributes
 
 
@@ -864,6 +864,7 @@ def __init__(self, header=None):
         self.raw_lbtim = None
         self.raw_lbpack = None
         self.boundary_packing = None
+        self._index_in_structured_load_file = None
         if header is not None:
             self.raw_lbtim = header[self.HEADER_DICT['lbtim'][0]]
             self.raw_lbpack = header[self.HEADER_DICT['lbpack'][0]]

diff --git a/lib/iris/fileformats/um/__init__.py b/lib/iris/fileformats/um/__init__.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2014 - 2017, Met Office
+# (C) British Crown Copyright 2014 - 2018, Met Office
 #
 # This file is part of Iris.
 #
@@ -27,7 +27,6 @@
 
 # Publish the FF-replacement features here, and include documentation.
 from ._ff_replacement import um_to_pp, load_cubes, load_cubes_32bit_ieee
-from ._fast_load import structured_um_loading
-from ._fast_load_structured_fields import FieldCollation
+from ._fast_load import structured_um_loading, FieldCollation
 __all__ = ['um_to_pp', 'load_cubes', 'load_cubes_32bit_ieee',
            'structured_um_loading', 'FieldCollation']
diff --git a/lib/iris/fileformats/um/_fast_load.py b/lib/iris/fileformats/um/_fast_load.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2016 - 2017, Met Office
+# (C) British Crown Copyright 2016 - 2018, Met Office
 #
 # This file is part of Iris.
 #
@@ -40,19 +40,67 @@
 import threading
 import os.path
 
+import numpy as np
+
 # Be minimal about what we import from iris, to avoid circular imports.
 # Below, other parts of iris.fileformats are accessed via deferred imports.
 import iris
 from iris.coords import DimCoord
 from iris.cube import CubeList
 from iris.exceptions import TranslationError
-
+from iris.fileformats.um._fast_load_structured_fields import \
+    BasicFieldCollation, group_structured_fields
 
 # Strings to identify the PP and FF file format handler specs.
 _FF_SPEC_NAME = 'UM Fieldsfile'
 _PP_SPEC_NAME = 'UM Post Processing file'
 
 
+class FieldCollation(BasicFieldCollation):
+    # This class specialises the BasicFieldCollation by adding the file-index
+    # and file-path concepts.
+    # This preserves the more abstract scope of the original 'FieldCollation'
+    # class, now renamed 'BasicFieldCollation'.
+
+    def __init__(self, fields, filepath):
+        """
+        Args:
+
+        * fields (iterable of :class:`iris.fileformats.pp.PPField`):
+            The fields in the collation.
+
+        * filepath (string):
+            The path of the file the collation is loaded from.
+
+        """
+        super(FieldCollation, self).__init__(fields)
+        self._load_filepath = filepath
+
+    @property
+    def data_filepath(self):
+        return self._load_filepath
+
+    @property
+    def data_field_indices(self):
+        """
+        Field indices of the contained PPFields in the input file.
+
+        This records the original file location of the individual data fields
+        contained, within the input datafile.
+
+        Returns:
+            An integer array of shape `self.vector_dims_shape`.
+
+        """
+        # Get shape :  N.B. this calculates (and caches) the structure.
+        vector_dims_shape = self.vector_dims_shape
+        # Get index-in-file of each contained field.
+        indices = np.array([field._index_in_structured_load_file
+                            for field in self._fields],
+                           dtype=np.int64)
+        return indices.reshape(vector_dims_shape)
+
+
 def _basic_load_function(filename, pp_filter=None, **kwargs):
     # The low-level 'fields from filename' loader.
     #
@@ -71,8 +119,6 @@ def _basic_load_function(filename, pp_filter=None, **kwargs):
     # Therefore, the actual loader will pass this as the 'pp_filter' keyword,
     # when it is present.
     # Additional load keywords are 'passed on' to the lower-level function.
-    from iris.fileformats.um._fast_load_structured_fields import \
-        group_structured_fields
 
     # Helper function to select the correct fields loader call.
     def _select_raw_fields_loader(fname):
@@ -98,10 +144,20 @@ def _select_raw_fields_loader(fname):
         return loader
 
     loader = _select_raw_fields_loader(filename)
-    fields = iter(field
-                  for field in loader(filename, **kwargs)
-                  if pp_filter is None or pp_filter(field))
-    return group_structured_fields(fields)
+
+    def iter_fields_decorated_with_load_indices(fields_iter):
+        for i_field, field in enumerate(fields_iter):
+            field._index_in_structured_load_file = i_field
+            yield field
+
+    fields = iter_fields_decorated_with_load_indices(
+        field
+        for field in loader(filename, **kwargs)
+        if pp_filter is None or pp_filter(field))
+
+    return group_structured_fields(fields,
+                                   collation_class=FieldCollation,
+                                   filepath=filename)
 
 
 # Define the preferred order of candidate dimension coordinates, as used by
@@ -342,7 +398,9 @@ def structured_um_loading():
         which is normally the whole of one phenomenon from a single input file.
         In particular, the callback's "field" argument is a
         :class:`~iris.fileformats.um.FieldCollation`, from which "field.fields"
-        gives a *list* of PPFields from which that cube was built.
+        gives a *list* of PPFields from which that cube was built, and the
+        properties "field.load_filepath" and "field.load_file_indices"
+        reference the original file locations of the cube data.
         The code required is therefore different from a 'normal' callback.
         For an example of this, see `this example in the Iris test code
         <https://github.com/SciTools/iris/

diff --git a/lib/iris/fileformats/um/_fast_load_structured_fields.py b/lib/iris/fileformats/um/_fast_load_structured_fields.py
@@ -18,7 +18,7 @@
 Code for fast loading of structured UM data.
 
 This module defines which pp-field elements take part in structured loading,
-and provides creation of :class:`FieldCollation` objects from lists of
+and provides creation of :class:`BasicFieldCollation` objects from lists of
 :class:`iris.fileformats.pp.PPField`.
 
 """
@@ -36,18 +36,18 @@
     optimal_array_structure
 
 
-class FieldCollation(object):
+class BasicFieldCollation(object):
     """
     An object representing a group of UM fields with array structure that can
     be vectorized into a single cube.
 
     For example:
 
     Suppose we have a set of 28 fields repeating over 7 vertical levels for
-    each of 4 different data times.  If a FieldCollation is created to contain
-    these, it can identify that this is a 4*7 regular array structure.
+    each of 4 different data times.  If a BasicFieldCollation is created to
+    contain these, it can identify that this is a 4*7 regular array structure.
 
-    This FieldCollation will then have the following properties:
+    This BasicFieldCollation will then have the following properties:
 
     * within 'element_arrays_and_dims' :
         Element 'blev' have the array shape (7,) and dims of (1,).
@@ -259,7 +259,9 @@ def _um_collation_key_function(field):
     # vector pseudo-level coordinate directly in the structured load analysis.
 
 
-def group_structured_fields(field_iterator):
+def group_structured_fields(field_iterator,
+                            collation_class=BasicFieldCollation,
+                            **collation_kwargs):
     """
     Collect structured fields into identified groups whose fields can be
     combined to form a single cube.
@@ -269,6 +271,13 @@ def group_structured_fields(field_iterator):
     * field_iterator (iterator of :class:`iris.fileformats.pp.PPField`):
         A source of PP or FF fields.  N.B. order is significant.
 
+    Kwargs:
+
+    * collation_class (class):
+        Type of collation wrapper to create from each group of fields.
+    * collation_kwargs (dict):
+        Additional constructor keywords for collation creation.
+
     The function sorts and collates on phenomenon-relevant metadata only,
     defined as the field components: 'lbuser[3]' (stash), 'lbproc' (statistic),
     'lbuser[6]' (model).
@@ -285,8 +294,8 @@ def group_structured_fields(field_iterator):
        :func:`iris.fileformats.pp_load_rules._convert_time_coords`).
 
     Returns:
-        A generator of FieldCollation objects, each of which contains a single
-        collated group from the input fields.
+        A generator of BasicFieldCollation objects, each of which contains a
+        single collated group from the input fields.
 
     .. note::
 
@@ -297,4 +306,4 @@ def group_structured_fields(field_iterator):
     """
     _fields = sorted(field_iterator, key=_um_collation_key_function)
     for _, fields in itertools.groupby(_fields, _um_collation_key_function):
-        yield FieldCollation(tuple(fields))
+        yield collation_class(tuple(fields), **collation_kwargs)
diff --git a/lib/iris/tests/unit/fileformats/um/fast_load/test_FieldCollation.py b/lib/iris/tests/unit/fileformats/um/fast_load/test_FieldCollation.py
@@ -0,0 +1,82 @@
+# (C) British Crown Copyright 2018, Met Office
+#
+# This file is part of Iris.
+#
+# Iris is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Iris is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with Iris.  If not, see <http://www.gnu.org/licenses/>.
+"""
+Unit tests for the class
+:class:`iris.fileformats.um._fast_load.FieldCollation`.
+
+This only tests the additional functionality for recording file locations of
+PPFields that make loaded cubes.
+The original class is the baseclass of this, now renamed 'BasicFieldCollation'.
+
+"""
+
+from __future__ import (absolute_import, division, print_function)
+from six.moves import (filter, input, map, range, zip)  # noqa
+
+# import iris tests first so that some things can be initialised
+# before importing anything else.
+import iris.tests as tests
+
+import numpy as np
+
+import iris
+
+from iris.tests.integration.fast_load.test_fast_load import Mixin_FieldTest
+
+
+class TestFastCallbackLocationInfo(Mixin_FieldTest, tests.IrisTest):
+    do_fast_loads = True
+
+    def setUp(self):
+        # Call parent setup.
+        super(TestFastCallbackLocationInfo, self).setUp()
+
+        # Create a basic load test case.
+        self.callback_collations = []
+        self.callback_filepaths = []
+
+        def fast_load_callback(cube, collation, filename):
+            self.callback_collations.append(collation)
+            self.callback_filepaths.append(filename)
+
+        flds = self.fields(c_t='11112222', c_h='11221122', phn='01010101')
+        self.test_filepath = self.save_fieldcubes(flds)
+        iris.load(self.test_filepath, callback=fast_load_callback)
+
+    def test_callback_collations_filepaths(self):
+        self.assertEqual(len(self.callback_collations), 2)
+        self.assertEqual(self.callback_collations[0].data_filepath,
+                         self.test_filepath)
+        self.assertEqual(self.callback_collations[1].data_filepath,
+                         self.test_filepath)
+
+    def test_callback_collations_field_indices(self):
+        self.assertEqual(
+            self.callback_collations[0].data_field_indices.dtype, np.int64)
+        self.assertArrayEqual(
+            self.callback_collations[0].data_field_indices,
+            [[1, 3], [5, 7]])
+
+        self.assertEqual(
+            self.callback_collations[1].data_field_indices.dtype, np.int64)
+        self.assertArrayEqual(
+            self.callback_collations[1].data_field_indices,
+            [[0, 2], [4, 6]])
+
+
+if __name__ == '__main__':
+    tests.main()
diff --git a/..._structured_fields/test_FieldCollation.py → ...ctured_fields/test_BasicFieldCollation.py b/..._structured_fields/test_FieldCollation.py → ...ctured_fields/test_BasicFieldCollation.py
@@ -31,7 +31,8 @@
 from cftime import datetime
 import numpy as np
 
-from iris.fileformats.um._fast_load_structured_fields import FieldCollation
+from iris.fileformats.um._fast_load_structured_fields \
+    import BasicFieldCollation as FieldCollation
 import iris.fileformats.pp
 
 

diff --git a/...ris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py b/...ris/tests/unit/fileformats/um/fast_load_structured_fields/test_group_structured_fields.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2014 - 2015, Met Office
+# (C) British Crown Copyright 2014 - 2018, Met Office
 #
 # This file is part of Iris.
 #
@@ -71,9 +71,7 @@ def _dummy_fields_iter(self, stashes=None, models=None, lbprocs=None):
 
     def _group_result(self, fields):
         # Run the testee, but returning just the groups (not FieldCollations).
-        with mock.patch('iris.fileformats.um._fast_load_structured_fields.'
-                        'FieldCollation', new=lambda args: args):
-            result = list(group_structured_fields(fields))
+        result = list(group_structured_fields(fields, collation_class=tuple))
         return result
 
     def _test_fields(self, item):