SciTools · djkirkham · Mar 9, 2018 · Mar 7, 2018 · Mar 7, 2018 · Mar 8, 2018
diff --git a/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Mar-08_co_realise_cubes.txt b/docs/iris/src/whatsnew/contributions_2.1/newfeature_2018-Mar-08_co_realise_cubes.txt
@@ -0,0 +1,3 @@
+* Added new function :func:`iris.co_realise_cubes` to compute multiple lazy
+  values in a single operation, avoiding repeated re-loading of data or
+  re-calculation of expressions.
diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py
@@ -112,6 +112,7 @@ def callback(cube, field, filename):
 from iris._deprecation import IrisDeprecation, warn_deprecated
 import iris.fileformats
 import iris.io
+from iris._lazy_data import co_realise_cubes
 
 
 try:
@@ -127,7 +128,7 @@ def callback(cube, field, filename):
 __all__ = ['load', 'load_cube', 'load_cubes', 'load_raw',
            'save', 'Constraint', 'AttributeConstraint', 'sample_data_path',
            'site_configuration', 'Future', 'FUTURE',
-           'IrisDeprecation']
+           'IrisDeprecation', 'co_realise_cubes']
 
 
 Constraint = iris._constraints.Constraint

diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py
@@ -1,4 +1,4 @@
-# (C) British Crown Copyright 2017, Met Office
+# (C) British Crown Copyright 2017 - 2018, Met Office
 #
 # This file is part of Iris.
 #
@@ -102,6 +102,33 @@ def as_lazy_data(data, chunks=None, asarray=False):
     return data
 
 
+def _co_realise_lazy_arrays(arrays):
+    """
+    Compute multiple lazy arrays and return a list of real values.
+
+    All the arrays are computed together, so they can share results for common
+    graph elements.
+
+    Also converts any MaskedConstants returned into masked arrays, to ensure
+    that all return values are writeable NumPy array objects.
+
+    """
+    computed_arrays = da.compute(*arrays)
+    results = []
+    for lazy_in, real_out in zip(arrays, computed_arrays):
+        real_out = np.asanyarray(real_out)
+        if isinstance(real_out, ma.core.MaskedConstant):
+            # Convert any masked constants into NumPy masked arrays :  In some
+            # cases dask may return a scalar numpy.int/numpy.float object
+            # rather than a numpy.ndarray object.
+            # Recorded in https://github.com/dask/dask/issues/2111.
+            # NOTE: also in this case, apply the original lazy-array dtype.
+            real_out = ma.masked_array(real_out.data, mask=real_out.mask,
+                                       dtype=lazy_in.dtype)
+        results.append(real_out)
+    return results
+
+
 def as_concrete_data(data):
     """
     Return the actual content of a lazy array, as a numpy array.
@@ -120,14 +147,7 @@ def as_concrete_data(data):
 
     """
     if is_lazy_data(data):
-        # Realise dask array, ensuring the data result is always a NumPy array.
-        # In some cases dask may return a scalar numpy.int/numpy.float object
-        # rather than a numpy.ndarray object.
-        # Recorded in https://github.com/dask/dask/issues/2111.
-        dtype = data.dtype
-        data = np.asanyarray(data.compute())
-        if isinstance(data, ma.core.MaskedConstant):
-            data = ma.masked_array(data.data, dtype=dtype, mask=data.mask)
+        data, = _co_realise_lazy_arrays([data])
 
     return data
 
@@ -158,3 +178,18 @@ def multidim_lazy_stack(stack):
         result = da.stack([multidim_lazy_stack(subarray)
                            for subarray in stack])
     return result
+
+
+def co_realise_cubes(cubes):
+    """
+    Fetch real data for multiple cubes at one time.
+
+    This fetches lazy content, equivalent to accessing each cube.data.
+    However, lazy calculations and data fetches can be shared between the
+    computations, improving performance.
+
+    """
+    results = _co_realise_lazy_arrays([cube.core_data() for cube in cubes])
+    for cube, result in zip(cubes, results):
+        cube.data = result
+    return cubes
diff --git a/lib/iris/tests/unit/lazy_data/test_co_realise_cubes.py b/lib/iris/tests/unit/lazy_data/test_co_realise_cubes.py
@@ -0,0 +1,87 @@
+# (C) British Crown Copyright 2018, Met Office
+#
+# This file is part of Iris.
+#
+# Iris is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Lesser General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Iris is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with Iris.  If not, see <http://www.gnu.org/licenses/>.
+"""Test function :func:`iris._lazy data.co_realise_cubes`."""
+
+from __future__ import (absolute_import, division, print_function)
+from six.moves import (filter, input, map, range, zip)  # noqa
+
+# Import iris.tests first so that some things can be initialised before
+# importing anything else.
+import iris.tests as tests
+
+from mock import MagicMock
+import numpy as np
+
+from iris.cube import Cube
+from iris._lazy_data import as_lazy_data
+
+from iris._lazy_data import co_realise_cubes
+
+
+class ArrayAccessCounter(object):
+    def __init__(self, array):
+        self.dtype = array.dtype
+        self.shape = array.shape
+        self._array = array
+        self.access_count = 0
+
+    def __getitem__(self, keys):
+        self.access_count += 1
+        return self._array[keys]
+
+
+class Test_co_realise_cubes(tests.IrisTest):
+    def test_empty(self):
+        self.assertEqual(co_realise_cubes([]), [])
+
+    def test_basic(self):
+        real_data = np.arange(3.)
+        cube = Cube(as_lazy_data(real_data))
+        self.assertTrue(cube.has_lazy_data())
+        result, = co_realise_cubes([cube])
+        self.assertEqual(result, cube)
+        self.assertFalse(cube.has_lazy_data())
+        self.assertArrayAllClose(cube.core_data(), real_data)
+
+    def test_multi(self):
+        real_data = np.arange(3.)
+        cube = Cube(as_lazy_data(real_data))
+        self.assertTrue(cube.has_lazy_data())
+        cube_2 = cube + 1
+        cube_3 = cube + 2
+        cubes = [cube, cube_2, cube_3]
+        for cube in cubes:
+            self.assertTrue(cube.has_lazy_data())
+        results = co_realise_cubes(cubes)
+        self.assertEqual(results, cubes)
+        for cube in cubes:
+            self.assertFalse(cube.has_lazy_data())
+
+    def test_combined_access(self):
+        wrapped_array = ArrayAccessCounter(np.arange(3.))
+        lazy_array = as_lazy_data(wrapped_array)
+        derived_a = lazy_array + 1
+        derived_b = lazy_array + 2
+        cube_a = Cube(derived_a)
+        cube_b = Cube(derived_b)
+        co_realise_cubes([cube_a, cube_b])
+        # Though used twice, the source data should only get fetched once.
+        self.assertEqual(wrapped_array.access_count, 1)
+
+
+if __name__ == '__main__':
+    tests.main()