From 4805e3bc073beca718bdcc51844c4eaa63e82d46 Mon Sep 17 00:00:00 2001 From: lbdreyer Date: Tue, 21 Mar 2017 11:27:44 +0000 Subject: [PATCH] Replace dask 'compute()' usage with a common realisation call. (#2) (#2447) Replace dask 'compute()' usage with a common realisation call. --- lib/iris/_lazy_data.py | 38 +++++++- lib/iris/_merge.py | 10 +- lib/iris/coords.py | 20 +++- lib/iris/cube.py | 16 ++-- lib/iris/fileformats/pp.py | 10 +- lib/iris/tests/unit/analysis/test_MEAN.py | 13 ++- lib/iris/tests/unit/analysis/test_STD_DEV.py | 11 ++- lib/iris/tests/unit/analysis/test_VARIANCE.py | 9 +- .../unit/lazy_data/test_as_concrete_data.py | 94 +++++++++++++++++++ .../tests/unit/lazy_data/test_as_lazy_data.py | 2 +- .../unit/lazy_data/test_convert_nans_array.py | 1 - .../tests/unit/lazy_data/test_is_lazy_data.py | 2 +- .../lazy_data/test_multidim_lazy_stack.py | 13 +-- 13 files changed, 190 insertions(+), 49 deletions(-) create mode 100644 lib/iris/tests/unit/lazy_data/test_as_concrete_data.py diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 1b333c7143..cf2531a6d3 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -47,12 +47,12 @@ def is_lazy_data(data): def as_lazy_data(data, chunks=_MAX_CHUNK_SIZE): """ - Convert the input array `data` to a lazy dask array. + Convert the input array `data` to a dask array. Args: * data: - An array. This will be converted to a lazy dask array. + An array. This will be converted to a dask array. Kwargs: @@ -63,7 +63,7 @@ def as_lazy_data(data, chunks=_MAX_CHUNK_SIZE): http://dask.pydata.org/en/latest/array-creation.html#chunks. Returns: - The input array converted to a lazy dask array. + The input array converted to a dask array. """ if not is_lazy_data(data): @@ -73,6 +73,38 @@ def as_lazy_data(data, chunks=_MAX_CHUNK_SIZE): return data +def as_concrete_data(data, **kwargs): + """ + Return the actual content of a lazy array, as a numpy array. + If the input data is a NumPy `ndarray` or masked array, return it + unchanged. + + If the input data is lazy, return the realised result. + + Where lazy data contains NaNs these are translated by filling or converting + to masked data, using the :func:`~iris._lazy_data.convert_nans_array` + function. + + Args: + + * data: + A dask array, NumPy `ndarray` or masked array + + Kwargs are passed through to :func:`~iris._lazy_data.convert_nans_array`. + + Returns: + A NumPy `ndarray` or masked array. + + """ + if is_lazy_data(data): + # Realise dask array. + data = data.compute() + # Convert any missing data as requested. + data = convert_nans_array(data, **kwargs) + + return data + + def array_masked_to_nans(array): """ Convert a masked array to a NumPy `ndarray` filled with NaN values. Input diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index 972e962d81..ba5ede570d 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -33,7 +33,8 @@ import numpy as np import numpy.ma as ma -from iris._lazy_data import as_lazy_data, is_lazy_data, multidim_lazy_stack +from iris._lazy_data import (as_lazy_data, as_concrete_data, is_lazy_data, + multidim_lazy_stack) import iris.cube import iris.coords import iris.exceptions @@ -1217,10 +1218,11 @@ def merge(self, unique=True): if all_have_data: # All inputs were concrete, so turn the result back into a # normal array. - merged_data = merged_data.compute() - # Unmask the array only if it is filled. + merged_data = as_concrete_data(merged_data, + nans_replacement=ma.masked) + # Unmask the array if it has no masked points. if (ma.isMaskedArray(merged_data) and - ma.count_masked(merged_data) == 0): + not ma.is_masked(merged_data)): merged_data = merged_data.data merged_cube = self._get_cube(merged_data) merged_cubes.append(merged_cube) diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 10903a5a5f..47fb832255 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -32,11 +32,11 @@ import warnings import zlib -from iris._lazy_data import is_lazy_data import dask.array as da import netcdftime import numpy as np +from iris._lazy_data import as_concrete_data, is_lazy_data import iris.aux_factory import iris.exceptions import iris.time @@ -1611,7 +1611,11 @@ def _sanitise_array(self, src, ndmin): def points(self): """Property containing the points values as a numpy array""" if is_lazy_data(self._points): - self._points = self._points.compute() + self._points = as_concrete_data(self._points, + nans_replacement=np.ma.masked) + # NOTE: we probably don't have full support for masked aux-coords. + # We certainly *don't* handle a _FillValue attribute (and possibly + # the loader will throw one away ?) return self._points.view() @points.setter @@ -1649,7 +1653,11 @@ def bounds(self): if self._bounds is not None: bounds = self._bounds if is_lazy_data(bounds): - bounds = bounds.compute() + bounds = as_concrete_data(bounds, + nans_replacement=np.ma.masked) + # NOTE: we probably don't fully support for masked aux-coords. + # We certainly *don't* handle a _FillValue attribute (and + # possibly the loader will throw one away ?) self._bounds = bounds bounds = bounds.view() else: @@ -1740,9 +1748,11 @@ def measure(self): @property def data(self): """Property containing the data values as a numpy array""" - data = self._data if is_lazy_data(self._data): - self._data = self._data.compute() + self._data = as_concrete_data(self._data, + nans_replacement=np.ma.masked) + # NOTE: like AuxCoords, we probably don't fully support masks, and + # we certainly don't handle any _FillValue attribute. return self._data.view() @data.setter diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 72b4476f12..222a76685e 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -42,8 +42,8 @@ import iris._concatenate import iris._constraints from iris._deprecation import warn_deprecated -from iris._lazy_data import (array_masked_to_nans, as_lazy_data, - convert_nans_array, is_lazy_data) +from iris._lazy_data import as_concrete_data, as_lazy_data, is_lazy_data + import iris._merge import iris.analysis from iris.analysis.cartography import wrap_lons @@ -1733,13 +1733,11 @@ def data(self): """ if self.has_lazy_data(): try: - data = self._dask_array.compute() - # Now convert the data payload from a NaN array to a - # masked array, and if appropriate cast to the specified - # cube result dtype. - result = convert_nans_array(data, - nans_replacement=ma.masked, - result_dtype=self.dtype) + # Realise the data, convert from a NaN array to a masked array, + # and if appropriate cast to the specified cube result dtype. + result = as_concrete_data(self._dask_array, + nans_replacement=ma.masked, + result_dtype=self.dtype) self._numpy_array = result self.dtype = None diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index c2fd82de43..e145d80838 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -39,11 +39,13 @@ import netcdftime from iris._deprecation import warn_deprecated +from iris._lazy_data import (array_masked_to_nans, as_concrete_data, + as_lazy_data, is_lazy_data) import iris.config import iris.fileformats.rules import iris.fileformats.pp_rules import iris.coord_systems -from iris._lazy_data import array_masked_to_nans, as_lazy_data, is_lazy_data + try: import mo_pack @@ -1280,11 +1282,11 @@ def data(self): """ # Cache the real data on first use - if is_lazy_data(self._data): - self._data = self._data.compute() if self._data.dtype.kind == 'i' and self.bmdi == -1e30: self.bmdi = -9999 - self._data[np.isnan(self._data)] = self.bmdi + if is_lazy_data(self._data): + self._data = as_concrete_data(self._data, + nans_replacement=self.bmdi) return self._data @data.setter diff --git a/lib/iris/tests/unit/analysis/test_MEAN.py b/lib/iris/tests/unit/analysis/test_MEAN.py index 93baafa21f..6460137419 100644 --- a/lib/iris/tests/unit/analysis/test_MEAN.py +++ b/lib/iris/tests/unit/analysis/test_MEAN.py @@ -28,6 +28,7 @@ import numpy.ma as ma from iris.analysis import MEAN +from iris._lazy_data import as_concrete_data class Test_lazy_aggregate(tests.IrisTest): @@ -42,15 +43,13 @@ def setUp(self): def test_mdtol_default(self): agg = MEAN.lazy_aggregate(self.array, axis=self.axis) - result = agg.compute() - masked_result = ma.masked_array(result, mask=np.isnan(result)) + masked_result = as_concrete_data(agg, nans_replacement=ma.masked) self.assertMaskedArrayAlmostEqual(masked_result, self.expected_masked) def test_mdtol_below(self): agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.3) - result = agg.compute() - masked_result = ma.masked_array(result, mask=np.isnan(result)) + masked_result = as_concrete_data(agg, nans_replacement=ma.masked) expected_masked = self.expected_masked expected_masked.mask = [False, True, True, True] self.assertMaskedArrayAlmostEqual(masked_result, @@ -58,8 +57,7 @@ def test_mdtol_below(self): def test_mdtol_above(self): agg = MEAN.lazy_aggregate(self.array, axis=self.axis, mdtol=0.4) - result = agg.compute() - masked_result = ma.masked_array(result, mask=np.isnan(result)) + masked_result = as_concrete_data(agg, nans_replacement=ma.masked) self.assertMaskedArrayAlmostEqual(masked_result, self.expected_masked) @@ -68,8 +66,9 @@ def test_multi_axis(self): collapse_axes = (0, 2) lazy_data = as_lazy_data(data) agg = MEAN.lazy_aggregate(lazy_data, axis=collapse_axes) + result = as_concrete_data(agg, nans_replacement=ma.masked) expected = np.mean(data, axis=collapse_axes) - self.assertArrayAllClose(agg.compute(), expected) + self.assertArrayAllClose(result, expected) class Test_name(tests.IrisTest): diff --git a/lib/iris/tests/unit/analysis/test_STD_DEV.py b/lib/iris/tests/unit/analysis/test_STD_DEV.py index 1cd9049b0f..3934a5a518 100644 --- a/lib/iris/tests/unit/analysis/test_STD_DEV.py +++ b/lib/iris/tests/unit/analysis/test_STD_DEV.py @@ -23,9 +23,9 @@ # importing anything else. import iris.tests as tests -from iris._lazy_data import as_lazy_data import numpy as np +from iris._lazy_data import as_concrete_data, as_lazy_data from iris.analysis import STD_DEV @@ -37,8 +37,7 @@ def test_mdtol(self): [1., 2., na, na]]) array = as_lazy_data(array) var = STD_DEV.lazy_aggregate(array, axis=1, mdtol=0.3) - result = var.compute() - masked_result = np.ma.masked_array(result, mask=np.isnan(result)) + masked_result = as_concrete_data(var, nans_replacement=np.ma.masked) masked_expected = np.ma.masked_array([0.57735, 1., 0.707107], mask=[0, 0, 1]) self.assertMaskedArrayAlmostEqual(masked_result, masked_expected) @@ -46,12 +45,14 @@ def test_mdtol(self): def test_ddof_one(self): array = as_lazy_data(np.arange(8)) var = STD_DEV.lazy_aggregate(array, axis=0, ddof=1) - self.assertArrayAlmostEqual(var.compute(), np.array(2.449489)) + result = as_concrete_data(var) + self.assertArrayAlmostEqual(result, np.array(2.449489)) def test_ddof_zero(self): array = as_lazy_data(np.arange(8)) var = STD_DEV.lazy_aggregate(array, axis=0, ddof=0) - self.assertArrayAlmostEqual(var.compute(), np.array(2.291287)) + result = as_concrete_data(var) + self.assertArrayAlmostEqual(result, np.array(2.291287)) class Test_name(tests.IrisTest): diff --git a/lib/iris/tests/unit/analysis/test_VARIANCE.py b/lib/iris/tests/unit/analysis/test_VARIANCE.py index f858cd7182..d56bf3b36a 100644 --- a/lib/iris/tests/unit/analysis/test_VARIANCE.py +++ b/lib/iris/tests/unit/analysis/test_VARIANCE.py @@ -23,13 +23,14 @@ # importing anything else. import iris.tests as tests -from iris._lazy_data import as_lazy_data import numpy as np import numpy.ma as ma +from iris._lazy_data import as_lazy_data, as_concrete_data from iris.analysis import VARIANCE import iris.cube from iris.coords import DimCoord + from iris.tests import mock @@ -71,12 +72,14 @@ class Test_lazy_aggregate(tests.IrisTest): def test_ddof_one(self): array = as_lazy_data(np.arange(8)) var = VARIANCE.lazy_aggregate(array, axis=0, ddof=1) - self.assertArrayAlmostEqual(var.compute(), np.array(6.0)) + result = as_concrete_data(var) + self.assertArrayAlmostEqual(result, np.array(6.0)) def test_ddof_zero(self): array = as_lazy_data(np.arange(8)) var = VARIANCE.lazy_aggregate(array, axis=0, ddof=0) - self.assertArrayAlmostEqual(var.compute(), np.array(5.25)) + result = as_concrete_data(var) + self.assertArrayAlmostEqual(result, np.array(5.25)) class Test_name(tests.IrisTest): diff --git a/lib/iris/tests/unit/lazy_data/test_as_concrete_data.py b/lib/iris/tests/unit/lazy_data/test_as_concrete_data.py new file mode 100644 index 0000000000..1c119f8b39 --- /dev/null +++ b/lib/iris/tests/unit/lazy_data/test_as_concrete_data.py @@ -0,0 +1,94 @@ +# (C) British Crown Copyright 2017, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Test function :func:`iris._lazy data.as_concrete_data`.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import dask.array as da +import numpy as np +import numpy.ma as ma + +from iris._lazy_data import as_concrete_data, as_lazy_data, is_lazy_data +from iris.tests import mock + + +class Test_as_concrete_data(tests.IrisTest): + def test_concrete_input_data(self): + data = np.arange(24).reshape((4, 6)) + result = as_concrete_data(data) + self.assertIs(data, result) + self.assertFalse(is_lazy_data(result)) + + def test_concrete_masked_input_data(self): + data = ma.masked_array([10, 12, 8, 2], mask=[True, True, False, True]) + result = as_concrete_data(data) + self.assertIs(data, result) + self.assertFalse(is_lazy_data(result)) + + def test_lazy_data(self): + # Minimal testing as as_concrete_data is a wrapper to + # convert_nans_array + data = np.arange(24).reshape((2, 12)) + lazy_array = as_lazy_data(data) + + sentinel = mock.sentinel.data + with mock.patch('iris._lazy_data.convert_nans_array') as conv_nans: + conv_nans.return_value = sentinel + result = as_concrete_data(lazy_array) + self.assertEqual(sentinel, result) + + # Check call to convert_nans_array + conv_nans.assert_called_once() + args, kwargs = conv_nans.call_args + arg, = args + self.assertFalse(is_lazy_data(arg)) + self.assertArrayEqual(arg, data) + self.assertEqual(kwargs, {}) + + def test_lazy_data_pass_thru_kwargs(self): + # Minimal testing as as_concrete_data is a wrapper to + # convert_nans_array + data = np.arange(24).reshape((2, 12)) + lazy_array = as_lazy_data(data) + nans_replacement = 7 + result_dtype = np.int16 + + sentinel = mock.sentinel.data + with mock.patch('iris._lazy_data.convert_nans_array') as conv_nans: + conv_nans.return_value = sentinel + result = as_concrete_data(lazy_array, + nans_replacement=nans_replacement, + result_dtype=result_dtype) + self.assertEqual(sentinel, result) + + # Check call to convert_nans_array + conv_nans.assert_called_once() + args, kwargs = conv_nans.call_args + arg, = args + self.assertFalse(is_lazy_data(arg)) + self.assertArrayEqual(arg, data) + self.assertEqual(kwargs, {'nans_replacement': nans_replacement, + 'result_dtype': result_dtype, }) + + +if __name__ == '__main__': + tests.main() diff --git a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py index 45aa0a9833..e5e235b564 100644 --- a/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_as_lazy_data.py @@ -23,8 +23,8 @@ # importing anything else. import iris.tests as tests -import numpy as np import dask.array as da +import numpy as np from iris._lazy_data import as_lazy_data, _MAX_CHUNK_SIZE diff --git a/lib/iris/tests/unit/lazy_data/test_convert_nans_array.py b/lib/iris/tests/unit/lazy_data/test_convert_nans_array.py index 23fa746935..f3abb6aaf4 100644 --- a/lib/iris/tests/unit/lazy_data/test_convert_nans_array.py +++ b/lib/iris/tests/unit/lazy_data/test_convert_nans_array.py @@ -26,7 +26,6 @@ # importing anything else. import iris.tests as tests - import numpy as np import numpy.ma as ma diff --git a/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py b/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py index 5a9595fa1f..1fdadf0aea 100644 --- a/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py +++ b/lib/iris/tests/unit/lazy_data/test_is_lazy_data.py @@ -23,8 +23,8 @@ # importing anything else. import iris.tests as tests -import numpy as np import dask.array as da +import numpy as np from iris._lazy_data import as_lazy_data, is_lazy_data, _MAX_CHUNK_SIZE diff --git a/lib/iris/tests/unit/lazy_data/test_multidim_lazy_stack.py b/lib/iris/tests/unit/lazy_data/test_multidim_lazy_stack.py index 80839b02b5..6dc409794a 100644 --- a/lib/iris/tests/unit/lazy_data/test_multidim_lazy_stack.py +++ b/lib/iris/tests/unit/lazy_data/test_multidim_lazy_stack.py @@ -23,10 +23,10 @@ # importing anything else. import iris.tests as tests -import numpy as np import dask.array as da +import numpy as np -from iris._lazy_data import as_lazy_data, multidim_lazy_stack +from iris._lazy_data import as_concrete_data, as_lazy_data, multidim_lazy_stack class Test_multidim_lazy_stack(tests.IrisTest): @@ -44,19 +44,20 @@ def _check(self, stack_shape): result = multidim_lazy_stack(stack) self.assertEqual(result.shape, stack_shape + stack_element_shape) self.assertIsInstance(result, da.core.Array) - self.assertArrayAllClose(result.compute(), expected) + result = as_concrete_data(result) + self.assertArrayAllClose(result, expected) def test_0d_lazy_stack(self): shape = () - result = self._check(shape) + self._check(shape) def test_1d_lazy_stack(self): shape = (2,) - result = self._check(shape) + self._check(shape) def test_2d_lazy_stack(self): shape = (3, 2) - result = self._check(shape) + self._check(shape) if __name__ == '__main__':