Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cube co-realisation. #2967

Merged
merged 6 commits into from
Mar 9, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
* Added new function :func:`iris.co_realise_cubes` to compute multiple lazy
values in a single operation, avoiding repeated re-loading of data or
re-calculation of expressions.
3 changes: 2 additions & 1 deletion lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def callback(cube, field, filename):
from iris._deprecation import IrisDeprecation, warn_deprecated
import iris.fileformats
import iris.io
from iris._lazy_data import co_realise_cubes


try:
Expand All @@ -127,7 +128,7 @@ def callback(cube, field, filename):
__all__ = ['load', 'load_cube', 'load_cubes', 'load_raw',
'save', 'Constraint', 'AttributeConstraint', 'sample_data_path',
'site_configuration', 'Future', 'FUTURE',
'IrisDeprecation']
'IrisDeprecation', 'co_realise_cubes']


Constraint = iris._constraints.Constraint
Expand Down
80 changes: 71 additions & 9 deletions lib/iris/_lazy_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2017, Met Office
# (C) British Crown Copyright 2017 - 2018, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -102,6 +102,39 @@ def as_lazy_data(data, chunks=None, asarray=False):
return data


def _co_realise_lazy_arrays(arrays):
"""
Compute multiple lazy arrays and return a list of real values.

All the arrays are computed together, so they can share results for common
graph elements.

Casts all results with `np.asanyarray`, and converts any MaskedConstants
appearing into masked arrays, to ensure that all return values are
writeable NumPy array objects.

Any non-lazy arrays are passed through, as they are by `da.compute`.
They undergo the same result standardisation.

"""
computed_arrays = da.compute(*arrays)
results = []
for lazy_in, real_out in zip(arrays, computed_arrays):
# Ensure we always have arrays.
# Note : in some cases dask (and numpy) will return a scalar
# numpy.int/numpy.float object rather than an ndarray.
# Recorded in https://github.com/dask/dask/issues/2111.
real_out = np.asanyarray(real_out)
if isinstance(real_out, ma.core.MaskedConstant):
# Convert any masked constants into NumPy masked arrays.
# NOTE: in this case, also apply the original lazy-array dtype, as
# masked constants *always* have dtype float64.
real_out = ma.masked_array(real_out.data, mask=real_out.mask,
dtype=lazy_in.dtype)
results.append(real_out)
return results


def as_concrete_data(data):
"""
Return the actual content of a lazy array, as a numpy array.
Expand All @@ -120,14 +153,7 @@ def as_concrete_data(data):

"""
if is_lazy_data(data):
# Realise dask array, ensuring the data result is always a NumPy array.
# In some cases dask may return a scalar numpy.int/numpy.float object
# rather than a numpy.ndarray object.
# Recorded in https://github.com/dask/dask/issues/2111.
dtype = data.dtype
data = np.asanyarray(data.compute())
if isinstance(data, ma.core.MaskedConstant):
data = ma.masked_array(data.data, dtype=dtype, mask=data.mask)
data, = _co_realise_lazy_arrays([data])

return data

Expand Down Expand Up @@ -158,3 +184,39 @@ def multidim_lazy_stack(stack):
result = da.stack([multidim_lazy_stack(subarray)
for subarray in stack])
return result


def co_realise_cubes(*cubes):
"""
Fetch 'real' data for multiple cubes, in a shared calculation.

This computes any lazy data, equivalent to accessing each `cube.data`.
However, lazy calculations and data fetches can be shared between the
computations, improving performance.

Args:

* cubes (list of :class:`~iris.cube.Cube`):
Arguments, each of which is a cube to be realised.

For example::

# Form stats.
a_std = cube_a.collapsed(['x', 'y'], iris.analysis.STD_DEV)
b_std = cube_b.collapsed(['x', 'y'], iris.analysis.STD_DEV)
ab_mean_diff = (cube_b - cube_a).collapsed(['x', 'y'],
iris.analysis.MEAN)
std_err = (a_std * a_std + b_std * b_std) ** 0.5

# Compute stats together (to avoid multiple data passes).
iris.co_realise_cubes(a_std, b_std, ab_mean_diff, std_err)


.. Note::

Cubes with non-lazy data may also be passed, with no ill effect.

"""
results = _co_realise_lazy_arrays([cube.core_data() for cube in cubes])
for cube, result in zip(cubes, results):
cube.data = result
86 changes: 86 additions & 0 deletions lib/iris/tests/unit/lazy_data/test_co_realise_cubes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# (C) British Crown Copyright 2018, Met Office
#
# This file is part of Iris.
#
# Iris is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the
# Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Iris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
"""Test function :func:`iris._lazy data.co_realise_cubes`."""

from __future__ import (absolute_import, division, print_function)
from six.moves import (filter, input, map, range, zip) # noqa

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests

from mock import MagicMock
import numpy as np

from iris.cube import Cube
from iris._lazy_data import as_lazy_data

from iris._lazy_data import co_realise_cubes
Copy link
Contributor

@djkirkham djkirkham Mar 8, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could combine this line with line 30. Actually I guess you're following the convention that the method being tested is imported on its own line?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍



class ArrayAccessCounter(object):
def __init__(self, array):
self.dtype = array.dtype
self.shape = array.shape
self._array = array
self.access_count = 0

def __getitem__(self, keys):
self.access_count += 1
return self._array[keys]


class Test_co_realise_cubes(tests.IrisTest):
def test_empty(self):
# Ensure that 'no args' case does not raise an error.
co_realise_cubes()

def test_basic(self):
real_data = np.arange(3.)
cube = Cube(as_lazy_data(real_data))
co_realise_cubes(cube)
self.assertFalse(cube.has_lazy_data())
self.assertArrayAllClose(cube.core_data(), real_data)

def test_multi(self):
real_data = np.arange(3.)
cube_base = Cube(as_lazy_data(real_data))
cube_inner = cube_base + 1
result_a = cube_base + 1
result_b = cube_inner + 1
co_realise_cubes(result_a, result_b)
# Check that target cubes were realised.
self.assertFalse(result_a.has_lazy_data())
self.assertFalse(result_b.has_lazy_data())
# Check that other cubes referenced remain lazy.
self.assertTrue(cube_base.has_lazy_data())
self.assertTrue(cube_inner.has_lazy_data())

def test_combined_access(self):
wrapped_array = ArrayAccessCounter(np.arange(3.))
lazy_array = as_lazy_data(wrapped_array)
derived_a = lazy_array + 1
derived_b = lazy_array + 2
cube_a = Cube(derived_a)
cube_b = Cube(derived_b)
co_realise_cubes(cube_a, cube_b)
# Though used twice, the source data should only get fetched once.
self.assertEqual(wrapped_array.access_count, 1)


if __name__ == '__main__':
tests.main()