diff --git a/docs/iris/src/whatsnew/contributions_3.0.0/newfeature_2019-Oct-14_cf_ancillary_data.txt b/docs/iris/src/whatsnew/contributions_3.0.0/newfeature_2019-Oct-14_cf_ancillary_data.txt new file mode 100644 index 0000000000..ea70702f38 --- /dev/null +++ b/docs/iris/src/whatsnew/contributions_3.0.0/newfeature_2019-Oct-14_cf_ancillary_data.txt @@ -0,0 +1 @@ +* CF Ancillary Variables are now supported in cubes. diff --git a/lib/iris/_concatenate.py b/lib/iris/_concatenate.py index 9f37fd24b2..8884f5306d 100644 --- a/lib/iris/_concatenate.py +++ b/lib/iris/_concatenate.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2017, Met Office +# (C) British Crown Copyright 2013 - 2019, Met Office # # This file is part of Iris. # diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index a1eafdbc3a..6423cc95ab 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2017, Met Office +# (C) British Crown Copyright 2010 - 2019, Met Office # # This file is part of Iris. # diff --git a/lib/iris/coords.py b/lib/iris/coords.py index 27c793241a..b0672141fb 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -15,7 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . """ -Definitions of coordinates. +Definitions of coordinates and other dimensional metadata. """ @@ -23,7 +23,7 @@ from six.moves import (filter, input, map, range, zip) # noqa import six -from abc import ABCMeta, abstractproperty +from abc import ABCMeta from collections import namedtuple try: # Python 3 from collections.abc import Iterator @@ -52,6 +52,750 @@ from iris.util import points_step +class _DimensionalMetadata(six.with_metaclass(ABCMeta, CFVariableMixin)): + """ + Superclass for dimensional metadata. + + """ + + _MODE_ADD = 1 + _MODE_SUB = 2 + _MODE_MUL = 3 + _MODE_DIV = 4 + _MODE_RDIV = 5 + _MODE_SYMBOL = {_MODE_ADD: '+', _MODE_SUB: '-', + _MODE_MUL: '*', _MODE_DIV: '/', + _MODE_RDIV: '/'} + + def __init__(self, values, standard_name=None, long_name=None, + var_name=None, units='no-unit', attributes=None): + """ + Constructs a single dimensional metadata object. + + Args: + + * values: + The values of the dimensional metadata. + + Kwargs: + + * standard_name: + CF standard name of the dimensional metadata. + * long_name: + Descriptive name of the dimensional metadata. + * var_name: + The netCDF variable name for the dimensional metadata. + * units + The :class:`~cf_units.Unit` of the dimensional metadata's values. + Can be a string, which will be converted to a Unit object. + * attributes + A dictionary containing other cf and user-defined attributes. + + """ + # Note: this class includes bounds handling code for convenience, but + # this can only run within instances which are also Coords, because + # only they may actually have bounds. This parent class has no + # bounds-related getter/setter properties, and no bounds keywords in + # its __init__ or __copy__ methods. The only bounds-related behaviour + # it provides is a 'has_bounds()' method, which always returns False. + + #: CF standard name of the quantity that the metadata represents. + self.standard_name = standard_name + + #: Descriptive name of the metadata. + self.long_name = long_name + + #: The netCDF variable name for the metadata. + self.var_name = var_name + + #: Unit of the quantity that the metadata represents. + self.units = units + + #: Other attributes, including user specified attributes that + #: have no meaning to Iris. + self.attributes = attributes + + # Set up DataManager attributes and values. + self._values_dm = None + self._values = values + self._bounds_dm = None # Only ever set on Coord-derived instances. + + def __getitem__(self, keys): + """ + Returns a new dimensional metadata whose values are obtained by + conventional array indexing. + + .. note:: + + Indexing of a circular coordinate results in a non-circular + coordinate if the overall shape of the coordinate changes after + indexing. + + """ + # Note: this method includes bounds handling code, but it only runs + # within Coord type instances, as only these allow bounds to be set. + + # Fetch the values. + values = self._values_dm.core_data() + + # Index values with the keys. + _, values = iris.util._slice_data_with_keys( + values, keys) + + # Copy values after indexing to avoid making metadata that is a + # view on another metadata. This will not realise lazy data. + values = values.copy() + + # If the metadata is a coordinate and it has bounds, repeat the above + # with the bounds. + copy_args = {} + if self.has_bounds(): + bounds = self._bounds_dm.core_data() + _, bounds = iris.util._slice_data_with_keys(bounds, keys) + # Pass into the copy method : for Coords, it has a 'bounds' key. + copy_args['bounds'] = bounds.copy() + + # The new metadata is a copy of the old one with replaced content. + new_metadata = self.copy(values, **copy_args) + + return new_metadata + + def copy(self, values=None): + """ + Returns a copy of this dimensional metadata object. + + Kwargs: + + * values + An array of values for the new dimensional metadata object. + This may be a different shape to the orginal values array being + copied. + + """ + # Note: this is overridden in Coord subclasses, to add bounds handling + # and a 'bounds' keyword. + new_metadata = copy.deepcopy(self) + if values is not None: + new_metadata._values_dm = None + new_metadata._values = values + + return new_metadata + + def _sanitise_array(self, src, ndmin): + if _lazy.is_lazy_data(src): + # Lazy data : just ensure ndmin requirement. + ndims_missing = ndmin - src.ndim + if ndims_missing <= 0: + result = src + else: + extended_shape = tuple([1] * ndims_missing + list(src.shape)) + result = src.reshape(extended_shape) + else: + # Real data : a few more things to do in this case. + # Ensure the array is writeable. + # NB. Returns the *same object* if src is already writeable. + result = np.require(src, requirements='W') + # Ensure the array has enough dimensions. + # NB. Returns the *same object* if result.ndim >= ndmin + func = ma.array if ma.isMaskedArray(result) else np.array + result = func(result, ndmin=ndmin, copy=False) + # We don't need to copy the data, but we do need to have our + # own view so we can control the shape, etc. + result = result.view() + return result + + @property + def _values(self): + """The _DimensionalMetadata values as a NumPy array.""" + return self._values_dm.data.view() + + @_values.setter + def _values(self, values): + # Set the values to a new array - as long as it's the same shape. + + # Ensure values has an ndmin of 1 and is either a numpy or lazy array. + # This will avoid Scalar _DimensionalMetadata with values of shape () + # rather than the desired (1,). + values = self._sanitise_array(values, 1) + + # Set or update DataManager. + if self._values_dm is None: + self._values_dm = DataManager(values) + else: + self._values_dm.data = values + + def _lazy_values(self): + """ + Returns a lazy array representing the dimensional metadata values. + + """ + return self._values_dm.lazy_data() + + def _core_values(self): + """ + The values array of this dimensional metadata which may be a NumPy + array or a dask array. + + """ + result = self._values_dm.core_data() + if not _lazy.is_lazy_data(result): + result = result.view() + + return result + + def _has_lazy_values(self): + """ + Returns a boolean indicating whether the metadata's values array is a + lazy dask array or not. + + """ + return self._values_dm.has_lazy_data() + + def _repr_other_metadata(self): + fmt = '' + if self.long_name: + fmt = ', long_name={self.long_name!r}' + if self.var_name: + fmt += ', var_name={self.var_name!r}' + if len(self.attributes) > 0: + fmt += ', attributes={self.attributes}' + result = fmt.format(self=self) + return result + + def _str_dates(self, dates_as_numbers): + date_obj_array = self.units.num2date(dates_as_numbers) + kwargs = {'separator': ', ', 'prefix': ' '} + return np.core.arrayprint.array2string(date_obj_array, + formatter={'all': str}, + **kwargs) + + def __str__(self): + # Note: this method includes bounds handling code, but it only runs + # within Coord type instances, as only these allow bounds to be set. + if self.units.is_time_reference(): + fmt = '{cls}({values}{bounds}' \ + ', standard_name={self.standard_name!r}' \ + ', calendar={self.units.calendar!r}{other_metadata})' + if self.units.is_long_time_interval(): + # A time unit with a long time interval ("months" or "years") + # cannot be converted to a date using `num2date` so gracefully + # fall back to printing points as numbers, not datetimes. + values = self._values + else: + values = self._str_dates(self._values) + bounds = '' + if self.has_bounds(): + if self.units.is_long_time_interval(): + bounds_vals = self.bounds + else: + bounds_vals = self._str_dates(self.bounds) + bounds = ', bounds={vals}'.format(vals=bounds_vals) + result = fmt.format(self=self, cls=type(self).__name__, + values=values, bounds=bounds, + other_metadata=self._repr_other_metadata()) + else: + result = repr(self) + + return result + + def __repr__(self): + # Note: this method includes bounds handling code, but it only runs + # within Coord type instances, as only these allow bounds to be set. + fmt = '{cls}({self._values!r}{bounds}' \ + ', standard_name={self.standard_name!r}, units={self.units!r}' \ + '{other_metadata})' + bounds = '' + # if coordinate, handle the bounds + if self.has_bounds(): + bounds = ', bounds=' + repr(self.bounds) + result = fmt.format(self=self, cls=type(self).__name__, + bounds=bounds, + other_metadata=self._repr_other_metadata()) + return result + + def __eq__(self, other): + # Note: this method includes bounds handling code, but it only runs + # within Coord type instances, as only these allow bounds to be set. + + eq = NotImplemented + # If the other object has a means of getting its definition, then do + # the comparison, otherwise return a NotImplemented to let Python try + # to resolve the operator elsewhere. + if hasattr(other, '_as_defn'): + # metadata comparison + eq = self._as_defn() == other._as_defn() + # data values comparison + if eq and eq is not NotImplemented: + eq = iris.util.array_equal(self._values, other._values, + withnans=True) + + # Also consider bounds, if we have them. + # (N.B. though only Coords can ever actually *have* bounds). + if eq and eq is not NotImplemented: + if self.has_bounds() and other.has_bounds(): + eq = iris.util.array_equal(self.bounds, other.bounds, + withnans=True) + else: + eq = not self.has_bounds() and not other.has_bounds() + + return eq + + def __ne__(self, other): + result = self.__eq__(other) + if result is not NotImplemented: + result = not result + return result + + def _as_defn(self): + defn = (self.standard_name, self.long_name, self.var_name, + self.units, self.attributes) + + return defn + + # Must supply __hash__ as Python 3 does not enable it if __eq__ is defined. + # NOTE: Violates "objects which compare equal must have the same hash". + # We ought to remove this, as equality of two dimensional metadata can + # *change*, so they really should not be hashable. + # However, current code needs it, e.g. so we can put them in sets. + # Fixing it will require changing those uses. See #962 and #1772. + def __hash__(self): + return hash(id(self)) + + def __binary_operator__(self, other, mode_constant): + """ + Common code which is called by add, sub, mul and div + + Mode constant is one of ADD, SUB, MUL, DIV, RDIV + + .. note:: + + The unit is *not* changed when doing scalar operations on a + metadata object. This means that a metadata object which represents + "10 meters" when multiplied by a scalar i.e. "1000" would result in + a metadata object of "10000 meters". An alternative approach could + be taken to multiply the *unit* by 1000 and the resultant metadata + object would represent "10 kilometers". + + """ + # Note: this method includes bounds handling code, but it only runs + # within Coord type instances, as only these allow bounds to be set. + + if (isinstance(other, _DimensionalMetadata) or + not isinstance(other, (int, float, np.number))): + + def typename(obj): + if isinstance(obj, Coord): + result = 'Coord' + else: + # We don't really expect this, but do something anyway. + result = self.__class__.__name__ + return result + + emsg = '{selftype} {operator} {othertype}'.format( + selftype=typename(self), + operator=self._MODE_SYMBOL[mode_constant], + othertype=typename(other)) + raise iris.exceptions.NotYetImplementedError(emsg) + + else: + # 'Other' is an array type : adjust points, and bounds if any. + result = NotImplemented + + def op(values): + if mode_constant == self._MODE_ADD: + new_values = values + other + elif mode_constant == self._MODE_SUB: + new_values = values - other + elif mode_constant == self._MODE_MUL: + new_values = values * other + elif mode_constant == self._MODE_DIV: + new_values = values / other + elif mode_constant == self._MODE_RDIV: + new_values = other / values + return new_values + + new_values = op(self._values_dm.core_data()) + result = self.copy(new_values) + if self.has_bounds(): + result.bounds = op(self._bounds_dm.core_data()) + + return result + + def __add__(self, other): + return self.__binary_operator__(other, self._MODE_ADD) + + def __sub__(self, other): + return self.__binary_operator__(other, self._MODE_SUB) + + def __mul__(self, other): + return self.__binary_operator__(other, self._MODE_MUL) + + def __div__(self, other): + return self.__binary_operator__(other, self._MODE_DIV) + + def __truediv__(self, other): + return self.__binary_operator__(other, self._MODE_DIV) + + def __radd__(self, other): + return self + other + + def __rsub__(self, other): + return (-self) + other + + def __rdiv__(self, other): + return self.__binary_operator__(other, self._MODE_RDIV) + + def __rtruediv__(self, other): + return self.__binary_operator__(other, self._MODE_RDIV) + + def __rmul__(self, other): + return self * other + + def __neg__(self): + values = -self._core_values() + copy_args = {} + if self.has_bounds(): + copy_args['bounds'] = -self.core_bounds() + return self.copy(values, **copy_args) + + def convert_units(self, unit): + """Change the units, converting the values of the metadata.""" + # If the coord has units convert the values in points (and bounds if + # present). + # Note: this method includes bounds handling code, but it only runs + # within Coord type instances, as only these allow bounds to be set. + if self.units.is_unknown(): + raise iris.exceptions.UnitConversionError( + 'Cannot convert from unknown units. ' + 'The "units" attribute may be set directly.') + + # Set up a delayed conversion for use if either values or bounds (if + # present) are lazy. + # Make fixed copies of old + new units for a delayed conversion. + old_unit = self.units + new_unit = unit + + # Define a delayed conversion operation (i.e. a callback). + def pointwise_convert(values): + return old_unit.convert(values, new_unit) + + if self._has_lazy_values(): + new_values = _lazy.lazy_elementwise(self._lazy_values(), + pointwise_convert) + else: + new_values = self.units.convert(self._values, unit) + self._values = new_values + if self.has_bounds(): + if self.has_lazy_bounds(): + new_bounds = _lazy.lazy_elementwise(self.lazy_bounds(), + pointwise_convert) + else: + new_bounds = self.units.convert(self.bounds, unit) + self.bounds = new_bounds + self.units = unit + + def is_compatible(self, other, ignore=None): + """ + Return whether the current dimensional metadata object is compatible + with another. + + """ + compatible = (self.name() == other.name() and + self.units == other.units) + + if compatible: + common_keys = set(self.attributes).intersection(other.attributes) + if ignore is not None: + if isinstance(ignore, six.string_types): + ignore = (ignore,) + common_keys = common_keys.difference(ignore) + for key in common_keys: + if np.any(self.attributes[key] != other.attributes[key]): + compatible = False + break + + return compatible + + @property + def dtype(self): + """ + The NumPy dtype of the current dimensional metadata object, as + specified by its values. + + """ + return self._values_dm.dtype + + @property + def ndim(self): + """ + Return the number of dimensions of the current dimensional metadata + object. + + """ + return self._values_dm.ndim + + def has_bounds(self): + """ + Return a boolean indicating whether the current dimensional metadata + object has a bounds array. + + """ + # Allows for code to handle unbounded dimensional metadata agnostic of + # whether the metadata is a coordinate or not. + return False + + @property + def shape(self): + """The fundamental shape of the metadata, expressed as a tuple.""" + return self._values_dm.shape + + def xml_element(self, doc): + """Return a DOM element describing this metadata.""" + # Create the XML element as the camelCaseEquivalent of the + # class name. + element_name = type(self).__name__ + element_name = element_name[0].lower() + element_name[1:] + element = doc.createElement(element_name) + + element.setAttribute('id', self._xml_id()) + + if self.standard_name: + element.setAttribute('standard_name', str(self.standard_name)) + if self.long_name: + element.setAttribute('long_name', str(self.long_name)) + if self.var_name: + element.setAttribute('var_name', str(self.var_name)) + element.setAttribute('units', repr(self.units)) + if isinstance(self, Coord): + if self.climatological: + element.setAttribute('climatological', + str(self.climatological)) + + if self.attributes: + attributes_element = doc.createElement('attributes') + for name in sorted(six.iterkeys(self.attributes)): + attribute_element = doc.createElement('attribute') + attribute_element.setAttribute('name', name) + attribute_element.setAttribute('value', + str(self.attributes[name])) + attributes_element.appendChild(attribute_element) + element.appendChild(attributes_element) + + if isinstance(self, Coord): + if self.coord_system: + element.appendChild(self.coord_system.xml_element(doc)) + + # Add the values + element.setAttribute('value_type', str(self._value_type_name())) + element.setAttribute('shape', str(self.shape)) + + # The values are referred to "points" of a coordinate and "data" + # otherwise. + if isinstance(self, Coord): + values_term = 'points' + else: + values_term = 'data' + if hasattr(self._values, 'to_xml_attr'): + element.setAttribute(values_term, self._values.to_xml_attr()) + else: + element.setAttribute(values_term, + iris.util.format_array(self._values)) + return element + + def _xml_id_extra(self, unique_value): + return unique_value + + def _xml_id(self): + # Returns a consistent, unique string identifier for this coordinate. + unique_value = b'' + if self.standard_name: + unique_value += self.standard_name.encode('utf-8') + unique_value += b'\0' + if self.long_name: + unique_value += self.long_name.encode('utf-8') + unique_value += b'\0' + unique_value += str(self.units).encode('utf-8') + b'\0' + for k, v in sorted(self.attributes.items()): + unique_value += (str(k) + ':' + str(v)).encode('utf-8') + b'\0' + # Extra modifications to unique_value that are specialised in child + # classes + unique_value = self._xml_id_extra(unique_value) + # Mask to ensure consistency across Python versions & platforms. + crc = zlib.crc32(unique_value) & 0xffffffff + return '%08x' % (crc, ) + + def _value_type_name(self): + """ + A simple, readable name for the data type of the dimensional metadata + values. + + """ + dtype = self._core_values().dtype + kind = dtype.kind + if kind in 'SU': + # Establish the basic type name for 'string' type data. + # N.B. this means "unicode" in Python3, and "str" in Python2. + value_type_name = 'string' + + # Override this if not the 'native' string type. + if six.PY3: + if kind == 'S': + value_type_name = 'bytes' + else: + if kind == 'U': + value_type_name = 'unicode' + else: + value_type_name = dtype.name + + return value_type_name + + +class AncillaryVariable(_DimensionalMetadata): + def __init__(self, data, standard_name=None, long_name=None, + var_name=None, units='no-unit', attributes=None): + """ + Constructs a single ancillary variable. + + Args: + + * values: + The values of the ancillary variable. + + Kwargs: + + * standard_name: + CF standard name of the ancillary variable. + * long_name: + Descriptive name of the ancillary variable. + * var_name: + The netCDF variable name for the ancillary variable. + * units + The :class:`~cf_units.Unit` of the ancillary variable's values. + Can be a string, which will be converted to a Unit object. + * attributes + A dictionary containing other cf and user-defined attributes. + + """ + super().__init__(values=data, standard_name=standard_name, + long_name=long_name, var_name=var_name, + units=units, attributes=attributes) + + @property + def data(self): + return self._values + + @data.setter + def data(self, data): + self._values = data + + def lazy_data(self): + """ + Return a lazy array representing the ancillary variable's data. + + Accessing this method will never cause the data values to be loaded. + Similarly, calling methods on, or indexing, the returned Array + will not cause the ancillary variable to have loaded data. + + If the data have already been loaded for the ancillary variable, the + returned Array will be a new lazy array wrapper. + + Returns: + A lazy array, representing the ancillary variable data array. + + """ + return super()._lazy_values() + + def core_data(self): + """ + The data array at the core of this ancillary variable, which may be a + NumPy array or a dask array. + + """ + return super()._core_values() + + def has_lazy_data(self): + """ + Return a boolean indicating whether the ancillary variable's data array + is a lazy dask array or not. + + """ + return super()._has_lazy_values() + + +class CellMeasure(AncillaryVariable): + """ + A CF Cell Measure, providing area or volume properties of a cell + where these cannot be inferred from the Coordinates and + Coordinate Reference System. + + """ + def __init__(self, data, standard_name=None, long_name=None, + var_name=None, units='1', attributes=None, measure=None): + + """ + Constructs a single cell measure. + + Args: + + * data: + The values of the measure for each cell. + Either a 'real' array (:class:`numpy.ndarray`) or a 'lazy' array + (:class:`dask.array.Array`). + + Kwargs: + + * standard_name: + CF standard name of the coordinate. + * long_name: + Descriptive name of the coordinate. + * var_name: + The netCDF variable name for the coordinate. + * units + The :class:`~cf_units.Unit` of the coordinate's values. + Can be a string, which will be converted to a Unit object. + * attributes + A dictionary containing other CF and user-defined attributes. + * measure + A string describing the type of measure. 'area' and 'volume' + are the only valid entries. + + """ + super().__init__(data=data, standard_name=standard_name, + long_name=long_name, var_name=var_name, + units=units, attributes=attributes) + + #: String naming the measure type. + self.measure = measure + + @property + def measure(self): + return self._measure + + @measure.setter + def measure(self, measure): + if measure not in ['area', 'volume']: + raise ValueError("measure must be 'area' or 'volume', " + "not {}".format(measure)) + self._measure = measure + + def __str__(self): + result = repr(self) + return result + + def __repr__(self): + fmt = ('{cls}({self.data!r}' + ', measure={self.measure}, standard_name={self.standard_name!r}' + ', units={self.units!r}{other_metadata})') + result = fmt.format(self=self, cls=type(self).__name__, + other_metadata=self._repr_other_metadata()) + return result + + def _as_defn(self): + defn = (self.standard_name, self.long_name, self.var_name, + self.units, self.attributes, self.measure) + return defn + + class CoordDefn(namedtuple('CoordDefn', ['standard_name', 'long_name', 'var_name', 'units', @@ -433,21 +1177,11 @@ def contains_point(self, point): return np.min(self.bound) <= point <= np.max(self.bound) -class Coord(six.with_metaclass(ABCMeta, CFVariableMixin)): +class Coord(_DimensionalMetadata): """ - Abstract superclass for coordinates. + Superclass for coordinates. """ - - _MODE_ADD = 1 - _MODE_SUB = 2 - _MODE_MUL = 3 - _MODE_DIV = 4 - _MODE_RDIV = 5 - _MODE_SYMBOL = {_MODE_ADD: '+', _MODE_SUB: '-', - _MODE_MUL: '*', _MODE_DIV: '/', - _MODE_RDIV: '/'} - def __init__(self, points, standard_name=None, long_name=None, var_name=None, units='1', bounds=None, attributes=None, coord_system=None, @@ -497,68 +1231,18 @@ def __init__(self, points, standard_name=None, long_name=None, from NetCDF. Always False if no bounds exist. """ - #: CF standard name of the quantity that the coordinate represents. - self.standard_name = standard_name - - #: Descriptive name of the coordinate. - self.long_name = long_name - - #: The netCDF variable name for the coordinate. - self.var_name = var_name - - #: Unit of the quantity that the coordinate represents. - self.units = units - - #: Other attributes, including user specified attributes that - #: have no meaning to Iris. - self.attributes = attributes + super().__init__(values=points, standard_name=standard_name, + long_name=long_name, var_name=var_name, + units=units, attributes=attributes) #: Relevant coordinate system (if any). self.coord_system = coord_system - # Set up DataManager attributes and points and bounds values. - self._points_dm = None + # Set up bounds DataManager attributes and the bounds values. self._bounds_dm = None - self.points = points self.bounds = bounds self.climatological = climatological - def __getitem__(self, keys): - """ - Returns a new Coord whose values are obtained by conventional array - indexing. - - .. note:: - - Indexing of a circular coordinate results in a non-circular - coordinate if the overall shape of the coordinate changes after - indexing. - - """ - # Fetch the points and bounds. - points = self._points_dm.core_data() - if self.has_bounds(): - bounds = self._bounds_dm.core_data() - else: - bounds = None - - # Index both points and bounds with the keys. - _, points = iris.util._slice_data_with_keys( - points, keys) - if bounds is not None: - _, bounds = iris.util._slice_data_with_keys( - bounds, keys) - - # Copy data after indexing, to avoid making coords that are - # views on other coords. This will not realise lazy data. - points = points.copy() - if bounds is not None: - bounds = bounds.copy() - - # The new coordinate is a copy of the old one with replaced content. - new_coord = self.copy(points=points, bounds=bounds) - return new_coord - def copy(self, points=None, bounds=None): """ Returns a copy of this coordinate. @@ -583,10 +1267,8 @@ def copy(self, points=None, bounds=None): raise ValueError('If bounds are specified, points must also be ' 'specified') - new_coord = copy.deepcopy(self) + new_coord = super().copy(values=points) if points is not None: - new_coord._points_dm = None - new_coord.points = points # Regardless of whether bounds are provided as an argument, new # points will result in new bounds, discarding those copied from # self. @@ -610,49 +1292,14 @@ def from_coord(cls, coord): kwargs['circular'] = getattr(coord, 'circular', False) return cls(**kwargs) - @staticmethod - def _sanitise_array(src, ndmin): - if _lazy.is_lazy_data(src): - # Lazy data : just ensure ndmin requirement. - ndims_missing = ndmin - src.ndim - if ndims_missing <= 0: - result = src - else: - extended_shape = tuple([1] * ndims_missing + list(src.shape)) - result = src.reshape(extended_shape) - else: - # Real data : a few more things to do in this case. - # Ensure the array is writeable. - # NB. Returns the *same object* if src is already writeable. - result = np.require(src, requirements='W') - # Ensure the array has enough dimensions. - # NB. Returns the *same object* if result.ndim >= ndmin - func = ma.array if ma.isMaskedArray(result) else np.array - result = func(result, ndmin=ndmin, copy=False) - # We don't need to copy the data, but we do need to have our - # own view so we can control the shape, etc. - result = result.view() - return result - @property def points(self): """The coordinate points values as a NumPy array.""" - return self._points_dm.data.view() + return self._values @points.setter def points(self, points): - # Set the points to a new array - as long as it's the same shape. - - # Ensure points has an ndmin of 1 and is either a numpy or lazy array. - # This will avoid Scalar coords with points of shape () rather - # than the desired (1,). - points = self._sanitise_array(points, 1) - - # Set or update DataManager. - if self._points_dm is None: - self._points_dm = DataManager(points) - else: - self._points_dm.data = points + self._values = points @property def bounds(self): @@ -731,7 +1378,7 @@ def lazy_points(self): A lazy array, representing the coord points array. """ - return self._points_dm.lazy_data() + return super()._lazy_values() def lazy_bounds(self): """ @@ -760,10 +1407,7 @@ def core_points(self): or a dask array. """ - result = self._points_dm.core_data() - if not _lazy.is_lazy_data(result): - result = result.view() - return result + return super()._core_values() def core_bounds(self): """ @@ -784,7 +1428,7 @@ def has_lazy_points(self): lazy dask array or not. """ - return self._points_dm.has_lazy_data() + return super()._has_lazy_values() def has_lazy_bounds(self): """ @@ -798,199 +1442,27 @@ def has_lazy_bounds(self): return result def _repr_other_metadata(self): - fmt = '' - if self.long_name: - fmt = ', long_name={self.long_name!r}' - if self.var_name: - fmt += ', var_name={self.var_name!r}' - if len(self.attributes) > 0: - fmt += ', attributes={self.attributes}' + result = super()._repr_other_metadata() if self.coord_system: - fmt += ', coord_system={self.coord_system}' + result += ', coord_system={}'.format(self.coord_system) if self.climatological: - fmt += ', climatological={' \ - 'self.climatological}' - result = fmt.format(self=self) - return result - - def _str_dates(self, dates_as_numbers): - date_obj_array = self.units.num2date(dates_as_numbers) - kwargs = {'separator': ', ', 'prefix': ' '} - return np.core.arrayprint.array2string(date_obj_array, - formatter={'all': str}, - **kwargs) - - def __str__(self): - if self.units.is_time_reference(): - fmt = '{cls}({points}{bounds}' \ - ', standard_name={self.standard_name!r}' \ - ', calendar={self.units.calendar!r}{other_metadata})' - if self.units.is_long_time_interval(): - # A time unit with a long time interval ("months" or "years") - # cannot be converted to a date using `num2date` so gracefully - # fall back to printing points as numbers, not datetimes. - points = self.points - else: - points = self._str_dates(self.points) - bounds = '' - if self.has_bounds(): - if self.units.is_long_time_interval(): - bounds_vals = self.bounds - else: - bounds_vals = self._str_dates(self.bounds) - bounds = ', bounds={vals}'.format(vals=bounds_vals) - result = fmt.format(self=self, cls=type(self).__name__, - points=points, bounds=bounds, - other_metadata=self._repr_other_metadata()) - else: - result = repr(self) - return result - - def __repr__(self): - fmt = '{cls}({self.points!r}{bounds}' \ - ', standard_name={self.standard_name!r}, units={self.units!r}' \ - '{other_metadata})' - bounds = '' - if self.has_bounds(): - bounds = ', bounds=' + repr(self.bounds) - result = fmt.format(self=self, cls=type(self).__name__, - bounds=bounds, - other_metadata=self._repr_other_metadata()) - return result - - def __eq__(self, other): - eq = NotImplemented - # If the other object has a means of getting its definition, and - # whether or not it has_points and has_bounds, then do the - # comparison, otherwise return a NotImplemented to let Python try to - # resolve the operator elsewhere. - if hasattr(other, '_as_defn'): - # metadata comparison - eq = self._as_defn() == other._as_defn() - # points comparison - if eq: - eq = iris.util.array_equal(self.points, other.points, - withnans=True) - # bounds comparison - if eq: - if self.has_bounds() and other.has_bounds(): - eq = iris.util.array_equal(self.bounds, other.bounds, - withnans=True) - else: - eq = self.bounds is None and other.bounds is None - - return eq - - def __ne__(self, other): - result = self.__eq__(other) - if result is not NotImplemented: - result = not result + result += ', climatological={}'.format(self.climatological) return result def _as_defn(self): - defn = CoordDefn(self.standard_name, self.long_name, self.var_name, - self.units, self.attributes, self.coord_system, - self.climatological) - return defn - - # Must supply __hash__ as Python 3 does not enable it if __eq__ is defined. - # NOTE: Violates "objects which compare equal must have the same hash". - # We ought to remove this, as equality of two coords can *change*, so they - # really should not be hashable. - # However, current code needs it, e.g. so we can put them in sets. - # Fixing it will require changing those uses. See #962 and #1772. - def __hash__(self): - return hash(id(self)) - - def __binary_operator__(self, other, mode_constant): - """ - Common code which is called by add, sub, mul and div - - Mode constant is one of ADD, SUB, MUL, DIV, RDIV - - .. note:: - - The unit is *not* changed when doing scalar operations on a - coordinate. This means that a coordinate which represents - "10 meters" when multiplied by a scalar i.e. "1000" would result - in a coordinate of "10000 meters". An alternative approach could - be taken to multiply the *unit* by 1000 and the resultant - coordinate would represent "10 kilometers". - - """ - if isinstance(other, Coord): - emsg = 'coord {} coord'.format(Coord._MODE_SYMBOL[mode_constant]) - raise iris.exceptions.NotYetImplementedError(emsg) - - elif isinstance(other, (int, float, np.number)): - points = self._points_dm.core_data() - - if mode_constant == Coord._MODE_ADD: - new_points = points + other - elif mode_constant == Coord._MODE_SUB: - new_points = points - other - elif mode_constant == Coord._MODE_MUL: - new_points = points * other - elif mode_constant == Coord._MODE_DIV: - new_points = points / other - elif mode_constant == Coord._MODE_RDIV: - new_points = other / points - - if self.has_bounds(): - bounds = self._bounds_dm.core_data() - - if mode_constant == Coord._MODE_ADD: - new_bounds = bounds + other - elif mode_constant == Coord._MODE_SUB: - new_bounds = bounds - other - elif mode_constant == Coord._MODE_MUL: - new_bounds = bounds * other - elif mode_constant == Coord._MODE_DIV: - new_bounds = bounds / other - elif mode_constant == Coord._MODE_RDIV: - new_bounds = other / bounds - - else: - new_bounds = None - new_coord = self.copy(new_points, new_bounds) - return new_coord - - else: - return NotImplemented - - def __add__(self, other): - return self.__binary_operator__(other, Coord._MODE_ADD) - - def __sub__(self, other): - return self.__binary_operator__(other, Coord._MODE_SUB) - - def __mul__(self, other): - return self.__binary_operator__(other, Coord._MODE_MUL) - - def __div__(self, other): - return self.__binary_operator__(other, Coord._MODE_DIV) - - def __truediv__(self, other): - return self.__binary_operator__(other, Coord._MODE_DIV) - - def __radd__(self, other): - return self + other - - def __rsub__(self, other): - return (-self) + other - - def __rdiv__(self, other): - return self.__binary_operator__(other, Coord._MODE_RDIV) - - def __rtruediv__(self, other): - return self.__binary_operator__(other, Coord._MODE_RDIV) - - def __rmul__(self, other): - return self * other + defn = CoordDefn(self.standard_name, self.long_name, self.var_name, + self.units, self.attributes, self.coord_system, + self.climatological) + return defn - def __neg__(self): - return self.copy(-self.core_points(), - -self.core_bounds() if self.has_bounds() else None) + # Must supply __hash__ as Python 3 does not enable it if __eq__ is defined. + # NOTE: Violates "objects which compare equal must have the same hash". + # We ought to remove this, as equality of two coords can *change*, so they + # really should not be hashable. + # However, current code needs it, e.g. so we can put them in sets. + # Fixing it will require changing those uses. See #962 and #1772. + def __hash__(self): + return hash(id(self)) def convert_units(self, unit): """ @@ -1008,35 +1480,7 @@ def convert_units(self, unit): :attr:`~iris.coords.Coord.bounds` by 180.0/:math:`\pi`. """ - # If the coord has units convert the values in points (and bounds if - # present). - if self.units.is_unknown(): - raise iris.exceptions.UnitConversionError( - 'Cannot convert from unknown units. ' - 'The "coord.units" attribute may be set directly.') - if self.has_lazy_points() or self.has_lazy_bounds(): - # Make fixed copies of old + new units for a delayed conversion. - old_unit = self.units - new_unit = unit - - # Define a delayed conversion operation (i.e. a callback). - def pointwise_convert(values): - return old_unit.convert(values, new_unit) - - if self.has_lazy_points(): - new_points = _lazy.lazy_elementwise(self.lazy_points(), - pointwise_convert) - else: - new_points = self.units.convert(self.points, unit) - self.points = new_points - if self.has_bounds(): - if self.has_lazy_bounds(): - new_bounds = _lazy.lazy_elementwise(self.lazy_bounds(), - pointwise_convert) - else: - new_bounds = self.units.convert(self.bounds, unit) - self.bounds = new_bounds - self.units = unit + super().convert_units(unit=unit) def cells(self): """ @@ -1251,31 +1695,12 @@ def is_compatible(self, other, ignore=None): Boolean. """ - compatible = (self.name() == other.name() and - self.units == other.units and - self.coord_system == other.coord_system) - - if compatible: - common_keys = set(self.attributes).intersection(other.attributes) - if ignore is not None: - if isinstance(ignore, six.string_types): - ignore = (ignore,) - common_keys = common_keys.difference(ignore) - for key in common_keys: - if np.any(self.attributes[key] != other.attributes[key]): - compatible = False - break + compatible = False + if (self.coord_system == other.coord_system): + compatible = super().is_compatible(other=other, ignore=ignore) return compatible - @property - def dtype(self): - """ - The NumPy dtype of the coord, as specified by its points. - - """ - return self._points_dm.dtype - @property def bounds_dtype(self): """ @@ -1288,15 +1713,6 @@ def bounds_dtype(self): result = self._bounds_dm.dtype return result - @property - def ndim(self): - """ - Return the number of dimensions of the coordinate (not including the - bounded dimension). - - """ - return self._points_dm.ndim - @property def nbounds(self): """ @@ -1312,11 +1728,6 @@ def has_bounds(self): """Return a boolean indicating whether the coord has a bounds array.""" return self._bounds_dm is not None - @property - def shape(self): - """The fundamental shape of the Coord, expressed as a tuple.""" - return self._points_dm.shape - def cell(self, index): """ Return the single :class:`Cell` instance which results from slicing the @@ -1701,45 +2112,15 @@ def nearest_neighbour_index(self, point): def xml_element(self, doc): """Return a DOM element describing this Coord.""" # Create the XML element as the camelCaseEquivalent of the - # class name. - element_name = type(self).__name__ - element_name = element_name[0].lower() + element_name[1:] - element = doc.createElement(element_name) - - element.setAttribute('id', self._xml_id()) - - if self.standard_name: - element.setAttribute('standard_name', str(self.standard_name)) - if self.long_name: - element.setAttribute('long_name', str(self.long_name)) - if self.var_name: - element.setAttribute('var_name', str(self.var_name)) - element.setAttribute('units', repr(self.units)) - if self.climatological: - element.setAttribute('climatological', str(self.climatological)) - - if self.attributes: - attributes_element = doc.createElement('attributes') - for name in sorted(six.iterkeys(self.attributes)): - attribute_element = doc.createElement('attribute') - attribute_element.setAttribute('name', name) - attribute_element.setAttribute('value', - str(self.attributes[name])) - attributes_element.appendChild(attribute_element) - element.appendChild(attributes_element) + # class name + element = super().xml_element(doc=doc) - # Add a coord system sub-element? - if self.coord_system: - element.appendChild(self.coord_system.xml_element(doc)) - - # Add the values - element.setAttribute('value_type', str(self._value_type_name())) - element.setAttribute('shape', str(self.shape)) if hasattr(self.points, 'to_xml_attr'): element.setAttribute('points', self.points.to_xml_attr()) else: element.setAttribute('points', iris.util.format_array(self.points)) + # Add bounds handling if self.has_bounds(): if hasattr(self.bounds, 'to_xml_attr'): element.setAttribute('bounds', self.bounds.to_xml_attr()) @@ -1749,47 +2130,10 @@ def xml_element(self, doc): return element - def _xml_id(self): - # Returns a consistent, unique string identifier for this coordinate. - unique_value = b'' - if self.standard_name: - unique_value += self.standard_name.encode('utf-8') - unique_value += b'\0' - if self.long_name: - unique_value += self.long_name.encode('utf-8') - unique_value += b'\0' - unique_value += str(self.units).encode('utf-8') + b'\0' - for k, v in sorted(self.attributes.items()): - unique_value += (str(k) + ':' + str(v)).encode('utf-8') + b'\0' + def _xml_id_extra(self, unique_value): + """Coord specific stuff for the xml id""" unique_value += str(self.coord_system).encode('utf-8') + b'\0' - # Mask to ensure consistency across Python versions & platforms. - crc = zlib.crc32(unique_value) & 0xffffffff - return '%08x' % (crc, ) - - def _value_type_name(self): - """ - A simple, readable name for the data type of the Coord point/bound - values. - - """ - dtype = self.core_points().dtype - kind = dtype.kind - if kind in 'SU': - # Establish the basic type name for 'string' type data. - # N.B. this means "unicode" in Python3, and "str" in Python2. - value_type_name = 'string' - - # Override this if not the 'native' string type. - if six.PY3: - if kind == 'S': - value_type_name = 'bytes' - else: - if kind == 'U': - value_type_name = 'unicode' - else: - value_type_name = dtype.name - - return value_type_name + return unique_value class DimCoord(Coord): @@ -1853,13 +2197,12 @@ def __init__(self, points, standard_name=None, long_name=None, read-only points and bounds. """ - super(DimCoord, self).__init__( - points, standard_name=standard_name, - long_name=long_name, var_name=var_name, - units=units, bounds=bounds, - attributes=attributes, - coord_system=coord_system, - climatological=climatological) + super().__init__(points, standard_name=standard_name, + long_name=long_name, var_name=var_name, + units=units, bounds=bounds, + attributes=attributes, + coord_system=coord_system, + climatological=climatological) #: Whether the coordinate wraps by ``coord.units.modulus``. self.circular = bool(circular) @@ -1871,17 +2214,17 @@ def __deepcopy__(self, memo): Used if copy.deepcopy is called on a coordinate. """ - new_coord = copy.deepcopy(super(DimCoord, self), memo) + new_coord = copy.deepcopy(super(), memo) # Ensure points and bounds arrays are read-only. - new_coord._points_dm.data.flags.writeable = False + new_coord._values_dm.data.flags.writeable = False if new_coord._bounds_dm is not None: new_coord._bounds_dm.data.flags.writeable = False return new_coord def copy(self, points=None, bounds=None): - new_coord = super(DimCoord, self).copy(points=points, bounds=bounds) + new_coord = super().copy(points=points, bounds=bounds) # Make the arrays read-only. - new_coord._points_dm.data.flags.writeable = False + new_coord._values_dm.data.flags.writeable = False if bounds is not None: new_coord._bounds_dm.data.flags.writeable = False return new_coord @@ -1906,7 +2249,7 @@ def __eq__(self, other): __hash__ = Coord.__hash__ def __getitem__(self, key): - coord = super(DimCoord, self).__getitem__(key) + coord = super().__getitem__(key) coord.circular = self.circular and coord.shape == self.shape return coord @@ -1951,8 +2294,8 @@ def _new_points_requirements(self, points): emsg = 'The {!r} {} points array must be strictly monotonic.' raise ValueError(emsg.format(self.name(), self.__class__.__name__)) - @Coord.points.setter - def points(self, points): + @Coord._values.setter + def _values(self, points): # DimCoord always realises the points, to allow monotonicity checks. # Ensure it is an actual array, and also make our own copy so that we # can make it read-only. @@ -1965,12 +2308,11 @@ def points(self, points): # Cast to a numpy array for masked arrays with no mask. points = np.array(points) - # Call the parent points setter. - super(DimCoord, self.__class__).points.fset(self, points) + super(DimCoord, self.__class__)._values.fset(self, points) - if self._points_dm is not None: + if self._values_dm is not None: # Re-fetch the core array, as the super call may replace it. - points = self._points_dm.core_data() + points = self._values_dm.core_data() # N.B. always a *real* array, as we realised 'points' at the start. # Make the array read-only. @@ -2053,7 +2395,7 @@ def is_monotonic(self): def xml_element(self, doc): """Return DOM element describing this :class:`iris.coords.DimCoord`.""" - element = super(DimCoord, self).xml_element(doc) + element = super().xml_element(doc) if self.circular: element.setAttribute('circular', str(self.circular)) return element @@ -2077,198 +2419,6 @@ class AuxCoord(Coord): # AuxCoord-specific code if needed in future. -class CellMeasure(six.with_metaclass(ABCMeta, CFVariableMixin)): - """ - A CF Cell Measure, providing area or volume properties of a cell - where these cannot be inferred from the Coordinates and - Coordinate Reference System. - - """ - - def __init__(self, data, standard_name=None, long_name=None, - var_name=None, units='1', attributes=None, measure=None): - - """ - Constructs a single cell measure. - - Args: - - * data: - The values of the measure for each cell. - Either a 'real' array (:class:`numpy.ndarray`) or a 'lazy' array - (:class:`dask.array.Array`). - - Kwargs: - - * standard_name: - CF standard name of the cell measure. - * long_name: - Descriptive name of the cell measure. - * var_name: - The netCDF variable name for the cell measure. - * units - The :class:`~cf_units.Unit` of the cell measure's values. - Can be a string, which will be converted to a Unit object. - * attributes - A dictionary containing other CF and user-defined attributes. - * measure - A string describing the type of measure. 'area' and 'volume' - are the only valid entries. - - """ - #: CF standard name of the quantity that the cell measure represents. - self.standard_name = standard_name - - #: Descriptive name of the cell measure. - self.long_name = long_name - - #: The netCDF variable name for the cell measure. - self.var_name = var_name - - #: Unit of the quantity that the cell measure represents. - self.units = units - - #: Other attributes, including user specified attributes that - #: have no meaning to Iris. - self.attributes = attributes - - #: String naming the measure type. - self.measure = measure - - # Initialise data via the data setter code, which applies standard - # checks and ajustments. - self.data = data - - @property - def measure(self): - return self._measure - - @property - def data(self): - """Property containing the data values as a numpy array""" - return self._data_manager.data - - @data.setter - def data(self, data): - # Set the data to a new array - as long as it's the same shape. - # If data are already defined for this CellMeasure, - if data is None: - raise ValueError('The data payload of a CellMeasure may not be ' - 'None; it must be a numpy array or equivalent.') - if data.shape == (): - # If we have a scalar value, promote the shape from () to (1,). - # NOTE: this way also *realises* it. Don't think that matters. - data = np.array(data, ndmin=1) - if hasattr(self, '_data_manager') and self._data_manager is not None: - # Check that setting these data wouldn't change self.shape - if data.shape != self.shape: - raise ValueError("New data shape must match existing data " - "shape.") - - self._data_manager = DataManager(data) - - @property - def shape(self): - """Returns the shape of the Cell Measure, expressed as a tuple.""" - return self._data_manager.shape - - @property - def ndim(self): - """Returns the number of dimensions of the cell measure.""" - return self._data_manager.ndim - - @measure.setter - def measure(self, measure): - if measure not in ['area', 'volume']: - raise ValueError("measure must be 'area' or 'volume', " - "not {}".format(measure)) - self._measure = measure - - def __getitem__(self, keys): - """ - Returns a new CellMeasure whose values are obtained by - conventional array indexing. - - """ - # Get the data, all or part of which will become the new data. - data = self._data_manager.core_data() - - # Index data with the keys. - # Note: does not copy data unless it has to. - _, data = iris.util._slice_data_with_keys(data, keys) - - # Always copy data, to avoid making the new measure a view onto the old - # one. - data = data.copy() - - # The result is a copy with replacement data. - return self.copy(data=data) - - def copy(self, data=None): - """ - Returns a copy of this CellMeasure. - - Kwargs: - - * data: A data array for the new cell_measure. - This may be a different shape to the data of the - cell_measure being copied. - - """ - new_cell_measure = copy.deepcopy(self) - if data is not None: - # Remove the existing data manager, to prevent the data setter - # checking against existing content. - new_cell_measure._data_manager = None - # Set new data via the data setter code, which applies standard - # checks and ajustments. - new_cell_measure.data = data - - return new_cell_measure - - def _repr_other_metadata(self): - fmt = '' - if self.long_name: - fmt = ', long_name={self.long_name!r}' - if self.var_name: - fmt += ', var_name={self.var_name!r}' - if len(self.attributes) > 0: - fmt += ', attributes={self.attributes}' - result = fmt.format(self=self) - return result - - def __str__(self): - result = repr(self) - return result - - def __repr__(self): - fmt = ('{cls}({self.data!r}' - ', measure={self.measure}, standard_name={self.standard_name!r}' - ', units={self.units!r}{other_metadata})') - result = fmt.format(self=self, cls=type(self).__name__, - other_metadata=self._repr_other_metadata()) - return result - - def _as_defn(self): - defn = (self.standard_name, self.long_name, self.var_name, - self.units, self.attributes, self.measure) - return defn - - def __eq__(self, other): - eq = NotImplemented - if isinstance(other, CellMeasure): - eq = self._as_defn() == other._as_defn() - if eq: - eq = (self.data == other.data).all() - return eq - - def __ne__(self, other): - result = self.__eq__(other) - if result is not NotImplemented: - result = not result - return result - - class CellMethod(iris.util._OrderedHashable): """ Represents a sub-cell pre-processing operation. diff --git a/lib/iris/cube.py b/lib/iris/cube.py index af405ebe20..686cbcbdac 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -692,7 +692,8 @@ def __init__(self, data, standard_name=None, long_name=None, var_name=None, units=None, attributes=None, cell_methods=None, dim_coords_and_dims=None, aux_coords_and_dims=None, aux_factories=None, - cell_measures_and_dims=None): + cell_measures_and_dims=None, + ancillary_variables_and_dims=None): """ Creates a cube with data and optional metadata. @@ -739,6 +740,8 @@ def __init__(self, data, standard_name=None, long_name=None, :mod:`iris.aux_factory`. * cell_measures_and_dims A list of CellMeasures with dimension mappings. + * ancillary_variables_and_dims + A list of AncillaryVariables with dimension mappings. For example:: >>> from iris.coords import DimCoord @@ -787,6 +790,9 @@ def __init__(self, data, standard_name=None, long_name=None, # Cell Measures self._cell_measures_and_dims = [] + # Ancillary Variables + self._ancillary_variables_and_dims = [] + identities = set() if dim_coords_and_dims: dims = set() @@ -816,6 +822,10 @@ def __init__(self, data, standard_name=None, long_name=None, for cell_measure, dims in cell_measures_and_dims: self.add_cell_measure(cell_measure, dims) + if ancillary_variables_and_dims: + for ancillary_variable, dims in ancillary_variables_and_dims: + self.add_ancillary_variable(ancillary_variable, dims) + @property def metadata(self): """ @@ -1049,6 +1059,34 @@ def add_cell_measure(self, cell_measure, data_dims=None): self._cell_measures_and_dims.sort(key=lambda cm_dims: (cm_dims[0]._as_defn(), cm_dims[1])) + def add_ancillary_variable(self, ancillary_variable, data_dims=None): + """ + Adds a CF ancillary variable to the cube. + + Args: + + * ancillary_variable + The :class:`iris.coords.AncillaryVariable` instance to be added to + the cube + + Kwargs: + * data_dims + Integer or iterable of integers giving the data dimensions spanned + by the ancillary variable. + + Raises a ValueError if an ancillary variable with identical metadata + already exists on the cube. + """ + if self.ancillary_variables(ancillary_variable): + raise ValueError('Duplicate ancillary variables not permitted') + + data_dims = self._check_multi_dim_metadata(ancillary_variable, + data_dims) + self._ancillary_variables_and_dims.append([ancillary_variable, + data_dims]) + self._ancillary_variables_and_dims.sort( + key=lambda av_dims: (av_dims[0]._as_defn(), av_dims[1])) + def add_dim_coord(self, dim_coord, data_dim): """ Add a CF coordinate to the cube. @@ -1168,6 +1206,22 @@ def remove_cell_measure(self, cell_measure): dim in self._cell_measures_and_dims if cell_measure_ is not cell_measure] + def remove_ancillary_variable(self, ancillary_variable): + + """ + Removes an ancillary variable from the cube. + + Args: + + * ancillary_variable (AncillaryVariable) + The AncillaryVariable to remove from the cube. + + """ + self._ancillary_variables_and_dims = [ + [ancillary_variable_, dim] for ancillary_variable_, dim in + self._ancillary_variables_and_dims + if ancillary_variable_ is not ancillary_variable] + def replace_coord(self, new_coord): """ Replace the coordinate whose metadata matches the given coordinate. @@ -1245,6 +1299,26 @@ def cell_measure_dims(self, cell_measure): return matches[0] + def ancillary_variable_dims(self, ancillary_variable): + """ + Returns a tuple of the data dimensions relevant to the given + AncillaryVariable. + + * ancillary_variable + The AncillaryVariable to look for. + + """ + # Search for existing ancillary variable (object) on the cube, faster + # lookup than equality - makes no functional difference. + matches = [dims for av, dims in self._ancillary_variables_and_dims + if av is ancillary_variable] + + if not matches: + raise iris.exceptions.AncillaryVariableNotFoundError( + ancillary_variable.name()) + + return matches[0] + def aux_factory(self, name=None, standard_name=None, long_name=None, var_name=None): """ @@ -1623,6 +1697,84 @@ def cell_measure(self, name_or_cell_measure=None): return cell_measures[0] + def ancillary_variables(self, name_or_ancillary_variable=None): + """ + Return a list of ancillary variable in this cube fitting the given + criteria. + + Kwargs: + + * name_or_ancillary_variable + Either + + (a) a :attr:`standard_name`, :attr:`long_name`, or + :attr:`var_name`. Defaults to value of `default` + (which itself defaults to `unknown`) as defined in + :class:`iris._cube_coord_common.CFVariableMixin`. + + (b) a ancillary_variable instance with metadata equal to that of + the desired ancillary_variables. + + See also + :meth:`Cube.ancillary_variable()`. + + """ + name = None + + if isinstance(name_or_ancillary_variable, six.string_types): + name = name_or_ancillary_variable + else: + ancillary_variable = name_or_ancillary_variable + ancillary_variables = [] + for av, _ in self._ancillary_variables_and_dims: + if name is not None: + if av.name() == name: + ancillary_variables.append(av) + elif ancillary_variable is not None: + if av == ancillary_variable: + ancillary_variables.append(av) + else: + ancillary_variables.append(av) + return ancillary_variables + + def ancillary_variable(self, name_or_ancillary_variable=None): + """ + Return a single ancillary_variable given the same arguments as + :meth:`Cube.ancillary_variables`. + + .. note:: + + If the arguments given do not result in precisely 1 + ancillary_variable being matched, an + :class:`iris.exceptions.AncillaryVariableNotFoundError` is raised. + + .. seealso:: + + :meth:`Cube.ancillary_variables()` + for full keyword documentation. + + """ + ancillary_variables = self.ancillary_variables( + name_or_ancillary_variable) + + if len(ancillary_variables) > 1: + msg = ('Expected to find exactly 1 ancillary_variable, but found ' + '{}. They were: {}.') + msg = msg.format(len(ancillary_variables), ', '.join(anc_var.name() + for anc_var in ancillary_variables)) + raise iris.exceptions.AncillaryVariableNotFoundError(msg) + elif len(ancillary_variables) == 0: + if isinstance(name_or_ancillary_variable, six.string_types): + bad_name = name_or_ancillary_variable + else: + bad_name = (name_or_ancillary_variable and + name_or_ancillary_variable.name()) or '' + msg = 'Expected to find exactly 1 {!s} ancillary_variable, but ' \ + 'found none.'.format(bad_name) + raise iris.exceptions.AncillaryVariableNotFoundError(msg) + + return ancillary_variables[0] + @property def cell_methods(self): """ @@ -1892,6 +2044,10 @@ def summary(self, shorten=False, name_padding=35): vector_cell_measures = [cm for cm in self.cell_measures() if cm.shape != (1,)] + # Ancillary Variables + vector_ancillary_variables = [av for av in + self.ancillary_variables()] + # Determine the cube coordinates that don't describe the cube and # are most likely erroneous. vector_coords = vector_dim_coords + vector_aux_coords + \ @@ -1916,7 +2072,7 @@ def summary(self, shorten=False, name_padding=35): # Generate textual summary of cube vector coordinates. # def vector_summary(vector_coords, cube_header, max_line_offset, - cell_measures=None): + cell_measures=None, ancillary_variables=None): """ Generates a list of suitably aligned strings containing coord names and dimensions indicated by one or more 'x' symbols. @@ -1929,6 +2085,8 @@ def vector_summary(vector_coords, cube_header, max_line_offset, """ if cell_measures is None: cell_measures = [] + if ancillary_variables is None: + ancillary_variables = [] vector_summary = [] vectors = [] @@ -1939,9 +2097,10 @@ def vector_summary(vector_coords, cube_header, max_line_offset, # Generate basic textual summary for each vector coordinate # - WITHOUT dimension markers. - for coord in vector_coords + cell_measures: + for dim_meta in ( + vector_coords + cell_measures + ancillary_variables): vector_summary.append('%*s%s' % ( - indent, ' ', iris.util.clip_string(coord.name()))) + indent, ' ', iris.util.clip_string(dim_meta.name()))) min_alignment = min(alignment) # Determine whether the cube header requires realignment @@ -1968,10 +2127,10 @@ def vector_summary(vector_coords, cube_header, max_line_offset, vector_summary[index] += line vectors = vectors + vector_coords if cell_measures: - # Generate full textual summary for each vector coordinate - # - WITH dimension markers. - for index, coord in enumerate(cell_measures): - dims = self.cell_measure_dims(coord) + # Generate full textual summary for each vector cell + # measure - WITH dimension markers. + for index, cell_measure in enumerate(cell_measures): + dims = self.cell_measure_dims(cell_measure) for dim in range(len(self.shape)): width = alignment[dim] - len(vector_summary[index]) @@ -1981,6 +2140,20 @@ def vector_summary(vector_coords, cube_header, max_line_offset, char=char) vector_summary[index] += line vectors = vectors + cell_measures + if ancillary_variables: + # Generate full textual summary for each vector ancillary + # variable - WITH dimension markers. + for index, av in enumerate(ancillary_variables): + dims = self.ancillary_variable_dims(av) + + for dim in range(len(self.shape)): + width = alignment[dim] - len(vector_summary[index]) + char = 'x' if dim in dims else '-' + line = '{pad:{width}}{char}'.format(pad=' ', + width=width, + char=char) + vector_summary[index] += line + vectors = vectors + ancillary_variables # Interleave any extra lines that are needed to distinguish # the coordinates. vector_summary = self._summary_extra(vectors, @@ -2023,6 +2196,16 @@ def vector_summary(vector_coords, cube_header, max_line_offset, summary += '\n Cell Measures:\n' summary += '\n'.join(cell_measure_summary) + # + # Generate summary of cube ancillary variables attribute + # + if vector_ancillary_variables: + ancillary_variable_summary, cube_header = vector_summary( + [], cube_header, max_line_offset, + ancillary_variables=vector_ancillary_variables) + summary += '\n Ancillary Variables:\n' + summary += '\n'.join(ancillary_variable_summary) + # # Generate textual summary of cube scalar coordinates. # @@ -2840,6 +3023,8 @@ def remap_cube_metadata(metadata_and_dims): self._aux_coords_and_dims)) self._cell_measures_and_dims = list(map(remap_cube_metadata, self._cell_measures_and_dims)) + self._ancillary_variables_and_dims = list(map( + remap_cube_metadata, self._ancillary_variables_and_dims)) def xml(self, checksum=False, order=True, byteorder=True): """ diff --git a/lib/iris/exceptions.py b/lib/iris/exceptions.py index dbc3f523d2..d2825099d0 100644 --- a/lib/iris/exceptions.py +++ b/lib/iris/exceptions.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2017, Met Office +# (C) British Crown Copyright 2010 - 2019, Met Office # # This file is part of Iris. # @@ -43,6 +43,11 @@ class CellMeasureNotFoundError(KeyError): pass +class AncillaryVariableNotFoundError(KeyError): + """Raised when a search yields no ancillary variables.""" + pass + + class CoordinateMultiDimError(ValueError): """Raised when a routine doesn't support multi-dimensional coordinates.""" def __init__(self, msg): diff --git a/lib/iris/tests/test_coord_api.py b/lib/iris/tests/test_coord_api.py index 4866adc915..97032f6903 100644 --- a/lib/iris/tests/test_coord_api.py +++ b/lib/iris/tests/test_coord_api.py @@ -631,7 +631,7 @@ def test_get_set_points_and_bounds(self): # set bounds from non-numpy pair. # First reset the underlying shape of the coordinate. - coord._points_dm = DataManager(1) + coord._values_dm = DataManager(1) coord.points = 1 coord.bounds = [123, 456] self.assertEqual(coord.shape, (1, )) @@ -639,7 +639,7 @@ def test_get_set_points_and_bounds(self): # set bounds from non-numpy pairs # First reset the underlying shape of the coord's points and bounds. - coord._points_dm = DataManager(np.arange(3)) + coord._values_dm = DataManager(np.arange(3)) coord.bounds = None coord.bounds = [[123, 456], [234, 567], [345, 678]] self.assertEqual(coord.shape, (3, )) diff --git a/lib/iris/tests/unit/coords/test_AncillaryVariable.py b/lib/iris/tests/unit/coords/test_AncillaryVariable.py new file mode 100644 index 0000000000..23b1a54b35 --- /dev/null +++ b/lib/iris/tests/unit/coords/test_AncillaryVariable.py @@ -0,0 +1,628 @@ +# (C) British Crown Copyright 2019, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Unit tests for the :class:`iris.coords.AncillaryVariable` class.""" + +from __future__ import (absolute_import, division, print_function) +from six.moves import (filter, input, map, range, zip) # noqa + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import dask.array as da +import numpy as np +import numpy.ma as ma + +from iris.tests.unit.coords import CoordTestMixin, lazyness_string + +from cf_units import Unit +from iris.coords import AncillaryVariable +from iris._lazy_data import as_lazy_data + + +def data_all_dtypes_and_lazynesses(self): + # Generate ancillary variables with real and lazy data, and a few different + # dtypes. + data_types = ['real', 'lazy'] + dtypes = [np.int16, np.int32, np.float32, np.float64] + for dtype in dtypes: + for data_type_name in data_types: + data = np.asarray(self.data_real, dtype=dtype) + if data_type_name == 'lazy': + data = da.from_array(data, data.shape) + ancill_var = AncillaryVariable(data) + result = (ancill_var, data_type_name) + yield result + + +class AncillaryVariableTestMixin(CoordTestMixin): + # Define a 2-D default array shape. + def setupTestArrays(self, shape=(2, 3), masked=False): + # Create concrete and lazy data test arrays, given a desired shape. + # If masked=True, also add masked arrays with some or no masked data. + n_vals = np.prod(shape) + # Note: the values must be integral for testing integer dtypes. + values = 100.0 + 10.0 * np.arange(n_vals, dtype=float).reshape(shape) + self.data_real = values + self.data_lazy = da.from_array(values, values.shape) + + if masked: + mvalues = ma.array(values) + self.no_masked_data_real = mvalues + self.no_masked_data_lazy = da.from_array(mvalues, mvalues.shape, + asarray=False) + mvalues = ma.array(mvalues, copy=True) + mvalues[0] = ma.masked + self.masked_data_real = mvalues + self.masked_data_lazy = da.from_array(mvalues, mvalues.shape, + asarray=False) + + +class Test__init__(tests.IrisTest, AncillaryVariableTestMixin): + # Test for AncillaryVariable creation, with real / lazy data + def setUp(self): + self.setupTestArrays(masked=True) + + def test_lazyness_and_dtype_combinations(self): + for (ancill_var, data_lazyness) in \ + data_all_dtypes_and_lazynesses(self, ): + data = ancill_var.core_data() + # Check properties of data. + if data_lazyness == 'real': + # Real data. + if ancill_var.dtype == self.data_real.dtype: + self.assertArraysShareData( + data, self.data_real, + 'Data values are not the same ' + 'data as the provided array.') + self.assertIsNot( + data, self.data_real, + 'Data array is the same instance as the provided ' + 'array.') + else: + # the original data values were cast to a test dtype. + check_data = self.data_real.astype(ancill_var.dtype) + self.assertEqualRealArraysAndDtypes(data, check_data) + else: + # Lazy data : the core data may be promoted to float. + check_data = self.data_lazy.astype(data.dtype) + self.assertEqualLazyArraysAndDtypes(data, check_data) + # The realisation type should be correct, though. + target_dtype = ancill_var.dtype + self.assertEqual(ancill_var.data.dtype, target_dtype) + + def test_no_masked_data_real(self): + data = self.no_masked_data_real + self.assertTrue(ma.isMaskedArray(data)) + self.assertEqual(ma.count_masked(data), 0) + ancill_var = AncillaryVariable(data) + self.assertFalse(ancill_var.has_lazy_data()) + self.assertTrue(ma.isMaskedArray(ancill_var.data)) + self.assertEqual(ma.count_masked(ancill_var.data), 0) + + def test_no_masked_data_lazy(self): + data = self.no_masked_data_lazy + computed = data.compute() + self.assertTrue(ma.isMaskedArray(computed)) + self.assertEqual(ma.count_masked(computed), 0) + ancill_var = AncillaryVariable(data) + self.assertTrue(ancill_var.has_lazy_data()) + self.assertTrue(ma.isMaskedArray(ancill_var.data)) + self.assertEqual(ma.count_masked(ancill_var.data), 0) + + def test_masked_data_real(self): + data = self.masked_data_real + self.assertTrue(ma.isMaskedArray(data)) + self.assertTrue(ma.count_masked(data)) + ancill_var = AncillaryVariable(data) + self.assertFalse(ancill_var.has_lazy_data()) + self.assertTrue(ma.isMaskedArray(ancill_var.data)) + self.assertTrue(ma.count_masked(ancill_var.data)) + + def test_masked_data_lazy(self): + data = self.masked_data_lazy + computed = data.compute() + self.assertTrue(ma.isMaskedArray(computed)) + self.assertTrue(ma.count_masked(computed)) + ancill_var = AncillaryVariable(data) + self.assertTrue(ancill_var.has_lazy_data()) + self.assertTrue(ma.isMaskedArray(ancill_var.data)) + self.assertTrue(ma.count_masked(ancill_var.data)) + + +class Test_core_data(tests.IrisTest, AncillaryVariableTestMixin): + # Test for AncillaryVariable.core_data() with various lazy/real data. + def setUp(self): + self.setupTestArrays() + + def test_real_data(self): + ancill_var = AncillaryVariable(self.data_real) + result = ancill_var.core_data() + self.assertArraysShareData( + result, self.data_real, + 'core_data() do not share data with the internal array.') + + def test_lazy_data(self): + ancill_var = AncillaryVariable(self.data_lazy) + result = ancill_var.core_data() + self.assertEqualLazyArraysAndDtypes(result, self.data_lazy) + + def test_lazy_points_realise(self): + ancill_var = AncillaryVariable(self.data_lazy) + real_data = ancill_var.data + result = ancill_var.core_data() + self.assertEqualRealArraysAndDtypes(result, real_data) + + +class Test_lazy_data(tests.IrisTest, AncillaryVariableTestMixin): + def setUp(self): + self.setupTestArrays() + + def test_real_core(self): + ancill_var = AncillaryVariable(self.data_real) + result = ancill_var.lazy_data() + self.assertEqualLazyArraysAndDtypes(result, self.data_lazy) + + def test_lazy_core(self): + ancill_var = AncillaryVariable(self.data_lazy) + result = ancill_var.lazy_data() + self.assertIs(result, self.data_lazy) + + +class Test_has_lazy_data(tests.IrisTest, AncillaryVariableTestMixin): + def setUp(self): + self.setupTestArrays() + + def test_real_core(self): + ancill_var = AncillaryVariable(self.data_real) + result = ancill_var.has_lazy_data() + self.assertFalse(result) + + def test_lazy_core(self): + ancill_var = AncillaryVariable(self.data_lazy) + result = ancill_var.has_lazy_data() + self.assertTrue(result) + + def test_lazy_core_realise(self): + ancill_var = AncillaryVariable(self.data_lazy) + ancill_var.data + result = ancill_var.has_lazy_data() + self.assertFalse(result) + + +class Test__getitem__(tests.IrisTest, AncillaryVariableTestMixin): + # Test for AncillaryVariable indexing with various types of data. + def setUp(self): + self.setupTestArrays() + + def test_partial_slice_data_copy(self): + parent_ancill_var = AncillaryVariable([1., 2., 3.]) + sub_ancill_var = parent_ancill_var[:1] + values_before_change = sub_ancill_var.data.copy() + parent_ancill_var.data[:] = -999.9 + self.assertArrayEqual(sub_ancill_var.data, values_before_change) + + def test_full_slice_data_copy(self): + parent_ancill_var = AncillaryVariable([1., 2., 3.]) + sub_ancill_var = parent_ancill_var[:] + values_before_change = sub_ancill_var.data.copy() + parent_ancill_var.data[:] = -999.9 + self.assertArrayEqual(sub_ancill_var.data, values_before_change) + + def test_dtypes(self): + # Index ancillary variables with real+lazy data, and either an int or + # floating dtype. + # Check that dtypes remain the same in all cases, taking the dtypes + # directly from the core data as we have no masking). + for (main_ancill_var, data_lazyness) in \ + data_all_dtypes_and_lazynesses(self): + + sub_ancill_var = main_ancill_var[:2, 1] + + ancill_var_dtype = main_ancill_var.dtype + msg = ('Indexing main_ancill_var of dtype {} with {} data changed' + 'dtype of {} to {}.') + + sub_data = sub_ancill_var.core_data() + self.assertEqual( + sub_data.dtype, ancill_var_dtype, + msg.format(ancill_var_dtype, data_lazyness, + 'data', sub_data.dtype)) + + def test_lazyness(self): + # Index ancillary variables with real+lazy data, and either an int or + # floating dtype. + # Check that lazy data stays lazy and real stays real, in all cases. + for (main_ancill_var, data_lazyness) in \ + data_all_dtypes_and_lazynesses(self): + + sub_ancill_var = main_ancill_var[:2, 1] + + msg = ('Indexing main_ancill_var of dtype {} with {} data ' + 'changed laziness of {} from {!r} to {!r}.') + ancill_var_dtype = main_ancill_var.dtype + sub_data_lazyness = lazyness_string(sub_ancill_var.core_data()) + self.assertEqual( + sub_data_lazyness, data_lazyness, + msg.format(ancill_var_dtype, data_lazyness, + 'data', data_lazyness, sub_data_lazyness)) + + def test_real_data_copies(self): + # Index ancillary variables with real+lazy data. + # In all cases, check that any real arrays are copied by the indexing. + for (main_ancill_var, data_lazyness) in \ + data_all_dtypes_and_lazynesses(self): + + sub_ancill_var = main_ancill_var[:2, 1] + + msg = ('Indexed ancillary variable with {} data ' + 'does not have its own separate {} array.') + if data_lazyness == 'real': + main_data = main_ancill_var.core_data() + sub_data = sub_ancill_var.core_data() + sub_main_data = main_data[:2, 1] + self.assertEqualRealArraysAndDtypes(sub_data, + sub_main_data) + self.assertArraysDoNotShareData( + sub_data, sub_main_data, + msg.format(data_lazyness, 'points')) + + +class Test_copy(tests.IrisTest, AncillaryVariableTestMixin): + # Test for AncillaryVariable.copy() with various types of data. + def setUp(self): + self.setupTestArrays() + + def test_lazyness(self): + # Copy ancillary variables with real+lazy data, and either an int or + # floating dtype. + # Check that lazy data stays lazy and real stays real, in all cases. + for (main_ancill_var, data_lazyness) in \ + data_all_dtypes_and_lazynesses(self): + + ancill_var_dtype = main_ancill_var.dtype + copied_ancill_var = main_ancill_var.copy() + + msg = ('Copying main_ancill_var of dtype {} with {} data ' + 'changed lazyness of {} from {!r} to {!r}.') + + copied_data_lazyness = lazyness_string( + copied_ancill_var.core_data()) + self.assertEqual(copied_data_lazyness, data_lazyness, + msg.format(ancill_var_dtype, data_lazyness, + 'points', + data_lazyness, copied_data_lazyness)) + + def test_realdata_copies(self): + # Copy ancillary variables with real+lazy data. + # In all cases, check that any real arrays are copies, not views. + for (main_ancill_var, data_lazyness) in \ + data_all_dtypes_and_lazynesses(self): + + copied_ancill_var = main_ancill_var.copy() + + msg = ('Copied ancillary variable with {} data ' + 'does not have its own separate {} array.') + + if data_lazyness == 'real': + main_data = main_ancill_var.core_data() + copied_data = copied_ancill_var.core_data() + self.assertEqualRealArraysAndDtypes(main_data, copied_data) + self.assertArraysDoNotShareData( + main_data, copied_data, + msg.format(data_lazyness, 'points')) + + +class Test_data__getter(tests.IrisTest, AncillaryVariableTestMixin): + def setUp(self): + self.setupTestArrays() + + def test_mutable_real_data(self): + # Check that ancill_var.data returns a modifiable array, and changes + # to it are reflected to the ancillary_var. + data = np.array([1.0, 2.0, 3.0, 4.0]) + ancill_var = AncillaryVariable(data) + initial_values = data.copy() + ancill_var.data[1:2] += 33.1 + result = ancill_var.data + self.assertFalse(np.all(result == initial_values)) + + def test_real_data(self): + # Getting real data does not change or copy them. + ancill_var = AncillaryVariable(self.data_real) + result = ancill_var.data + self.assertArraysShareData( + result, self.data_real, + 'Data values do not share data with the provided array.') + + def test_lazy_data(self): + # Getting lazy data realises them. + ancill_var = AncillaryVariable(self.data_lazy) + self.assertTrue(ancill_var.has_lazy_data()) + result = ancill_var.data + self.assertFalse(ancill_var.has_lazy_data()) + self.assertEqualRealArraysAndDtypes(result, self.data_real) + + +class Test_data__setter(tests.IrisTest, AncillaryVariableTestMixin): + def setUp(self): + self.setupTestArrays() + + def test_real_set_real(self): + # Setting new real data does not make a copy. + ancill_var = AncillaryVariable(self.data_real) + new_data = self.data_real + 102.3 + ancill_var.data = new_data + result = ancill_var.core_data() + self.assertArraysShareData( + result, new_data, + 'Data values do not share data with the assigned array.') + + def test_fail_bad_shape(self): + # Setting real data requires matching shape. + ancill_var = AncillaryVariable([1.0, 2.0]) + msg = 'Require data with shape \(2,\), got \(3,\)' + with self.assertRaisesRegexp(ValueError, msg): + ancill_var.data = np.array([1.0, 2.0, 3.0]) + + def test_real_set_lazy(self): + # Setting new lazy data does not make a copy. + ancill_var = AncillaryVariable(self.data_real) + new_data = self.data_lazy + 102.3 + ancill_var.data = new_data + result = ancill_var.core_data() + self.assertEqualLazyArraysAndDtypes(result, new_data) + + +class Test__str__(tests.IrisTest): + def test_non_time_values(self): + ancillary_var = AncillaryVariable( + np.array([2, 5, 9]), + standard_name='height', + long_name='height of detector', + var_name='height', + units='m', + attributes={'notes': 'Measured from sea level'}) + expected = ("AncillaryVariable(array([2, 5, 9]), " + "standard_name='height', units=Unit('m'), " + "long_name='height of detector', var_name='height', " + "attributes={'notes': 'Measured from sea level'})") + self.assertEqual(expected, ancillary_var.__str__(), ) + + def test_time_values(self): + ancillary_var = AncillaryVariable( + np.array([2, 5, 9]), + units='hours since 1970-01-01 01:00', + long_name='time of previous valid detection') + expected = ("AncillaryVariable([1970-01-01 03:00:00, " + "1970-01-01 06:00:00, 1970-01-01 10:00:00], " + "standard_name=None, calendar='gregorian', " + "long_name='time of previous valid detection')") + self.assertEqual(expected, ancillary_var.__str__(), ) + + +class Test__repr__(tests.IrisTest): + def test_non_time_values(self): + ancillary_var = AncillaryVariable( + np.array([2, 5, 9]), + standard_name='height', + long_name='height of detector', + var_name='height', + units='m', + attributes={'notes': 'Measured from sea level'}) + expected = ("AncillaryVariable(array([2, 5, 9]), " + "standard_name='height', units=Unit('m'), " + "long_name='height of detector', var_name='height', " + "attributes={'notes': 'Measured from sea level'})") + self.assertEqual(expected, ancillary_var.__repr__(), ) + + def test_time_values(self): + ancillary_var = AncillaryVariable( + np.array([2, 5, 9]), + units='hours since 1970-01-01 01:00', + long_name='time of previous valid detection') + expected = ("AncillaryVariable(array([2, 5, 9]), standard_name=None, " + "units=Unit('hours since 1970-01-01 01:00', " + "calendar='gregorian'), " + "long_name='time of previous valid detection')") + self.assertEqual(expected, ancillary_var.__repr__()) + + +class Test___binary_operator__(tests.IrisTest, AncillaryVariableTestMixin): + # Test maths operations on on real+lazy data. + def setUp(self): + self.setupTestArrays() + + self.real_ancill_var = AncillaryVariable(self.data_real) + self.lazy_ancill_var = AncillaryVariable(self.data_lazy) + + self.test_combinations = [ + (self.real_ancill_var, self.data_real, 'real'), + (self.lazy_ancill_var, self.data_lazy, 'lazy')] + + def _check(self, result_ancill_var, expected_data, lazyness): + # Test each operation on + data = result_ancill_var.core_data() + if lazyness == 'real': + self.assertEqualRealArraysAndDtypes(expected_data, data) + else: + self.assertEqualLazyArraysAndDtypes(expected_data, data) + + def test_add(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = ancill_var + 10 + expected_data = orig_data + 10 + self._check(result, expected_data, data_lazyness) + + def test_add_inplace(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + ancill_var += 10 + expected_data = orig_data + 10 + self._check(ancill_var, expected_data, data_lazyness) + + def test_right_add(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = 10 + ancill_var + expected_data = 10 + orig_data + self._check(result, expected_data, data_lazyness) + + def test_subtract(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = ancill_var - 10 + expected_data = orig_data - 10 + self._check(result, expected_data, data_lazyness) + + def test_subtract_inplace(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + ancill_var -= 10 + expected_data = orig_data - 10 + self._check(ancill_var, expected_data, data_lazyness) + + def test_right_subtract(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = 10 - ancill_var + expected_data = 10 - orig_data + self._check(result, expected_data, data_lazyness) + + def test_multiply(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = ancill_var * 10 + expected_data = orig_data * 10 + self._check(result, expected_data, data_lazyness) + + def test_multiply_inplace(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + ancill_var *= 10 + expected_data = orig_data * 10 + self._check(ancill_var, expected_data, data_lazyness) + + def test_right_multiply(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = 10 * ancill_var + expected_data = 10 * orig_data + self._check(result, expected_data, data_lazyness) + + def test_divide(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = ancill_var / 10 + expected_data = orig_data / 10 + self._check(result, expected_data, data_lazyness) + + def test_divide_inplace(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + ancill_var /= 10 + expected_data = orig_data / 10 + self._check(ancill_var, expected_data, data_lazyness) + + def test_right_divide(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = 10 / ancill_var + expected_data = 10 / orig_data + self._check(result, expected_data, data_lazyness) + + def test_negative(self): + for (ancill_var, orig_data, data_lazyness) in self.test_combinations: + result = -ancill_var + expected_data = -orig_data + self._check(result, expected_data, data_lazyness) + + +class Test_has_bounds(tests.IrisTest): + def test(self): + ancillary_var = AncillaryVariable(np.array([2, 9, 5])) + self.assertFalse(ancillary_var.has_bounds()) + + +class Test_convert_units(tests.IrisTest): + def test_preserves_lazy(self): + test_data = np.array([[11.1, 12.2, 13.3], + [21.4, 22.5, 23.6]]) + lazy_data = as_lazy_data(test_data) + ancill_var = AncillaryVariable(data=lazy_data, units='m') + ancill_var.convert_units('ft') + self.assertTrue(ancill_var.has_lazy_data()) + test_data_ft = Unit('m').convert(test_data, 'ft') + self.assertArrayAllClose(ancill_var.data, test_data_ft) + + +class Test_is_compatible(tests.IrisTest): + def setUp(self): + self.ancill_var = AncillaryVariable( + [1., 8., 22.], + standard_name='number_of_observations', units='1') + self.modified_ancill_var = self.ancill_var.copy() + + def test_not_compatible_diff_name(self): + # Different name() - not compatible + self.modified_ancill_var.rename('air_temperature') + self.assertFalse( + self.ancill_var.is_compatible(self.modified_ancill_var)) + + def test_not_compatible_diff_units(self): + # Different units- not compatible + self.modified_ancill_var.units = 'm' + self.assertFalse( + self.ancill_var.is_compatible(self.modified_ancill_var)) + + def test_not_compatible_diff_common_attrs(self): + # Different common attributes - not compatible. + self.ancill_var.attributes['source'] = 'A' + self.modified_ancill_var.attributes['source'] = 'B' + self.assertFalse( + self.ancill_var.is_compatible(self.modified_ancill_var)) + + def test_compatible_diff_data(self): + # Different data values - compatible. + self.modified_ancill_var.data = [10., 20., 100.] + self.assertTrue( + self.ancill_var.is_compatible(self.modified_ancill_var)) + + def test_compatible_diff_var_name(self): + # Different var_name (but same name()) - compatible. + self.modified_ancill_var.var_name = 'obs_num' + self.assertTrue( + self.ancill_var.is_compatible(self.modified_ancill_var)) + + def test_compatible_diff_non_common_attributes(self): + # Different non-common attributes - compatible. + self.ancill_var.attributes['source'] = 'A' + self.modified_ancill_var.attributes['origin'] = 'B' + self.assertTrue( + self.ancill_var.is_compatible(self.modified_ancill_var)) + + def test_compatible_ignore_common_attribute(self): + # ignore different common attributes - compatible. + self.ancill_var.attributes['source'] = 'A' + self.modified_ancill_var.attributes['source'] = 'B' + self.assertTrue( + self.ancill_var.is_compatible(self.modified_ancill_var, + ignore='source')) + + +class TestEquality(tests.IrisTest): + def test_nanpoints_eq_self(self): + av1 = AncillaryVariable([1., np.nan, 2.]) + self.assertEqual(av1, av1) + + def test_nanpoints_eq_copy(self): + av1 = AncillaryVariable([1., np.nan, 2.]) + av2 = av1.copy() + self.assertEqual(av1, av2) + + +if __name__ == '__main__': + tests.main() diff --git a/lib/iris/tests/unit/coords/test_CellMeasure.py b/lib/iris/tests/unit/coords/test_CellMeasure.py index bb839e86f9..9be50d811d 100644 --- a/lib/iris/tests/unit/coords/test_CellMeasure.py +++ b/lib/iris/tests/unit/coords/test_CellMeasure.py @@ -74,7 +74,7 @@ def test_set_data__lazy(self): def test_data_different_shape(self): new_vals = np.array((1., 2., 3.)) - msg = 'New data shape must match existing data shape.' + msg = 'Require data with shape.' with self.assertRaisesRegexp(ValueError, msg): self.measure.data = new_vals @@ -124,5 +124,6 @@ def test__repr__(self): def test__eq__(self): self.assertEqual(self.measure, self.measure) + if __name__ == '__main__': tests.main() diff --git a/lib/iris/tests/unit/coords/test_Coord.py b/lib/iris/tests/unit/coords/test_Coord.py index 314d990984..d397d8b00a 100644 --- a/lib/iris/tests/unit/coords/test_Coord.py +++ b/lib/iris/tests/unit/coords/test_Coord.py @@ -739,7 +739,7 @@ class Test_convert_units(tests.IrisTest): def test_convert_unknown_units(self): coord = iris.coords.AuxCoord(1, units='unknown') emsg = ('Cannot convert from unknown units. ' - 'The "coord.units" attribute may be set directly.') + 'The "units" attribute may be set directly.') with self.assertRaisesRegexp(UnitConversionError, emsg): coord.convert_units('degrees') diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py index ea2c3c2dfe..4e93e394f3 100644 --- a/lib/iris/tests/unit/cube/test_Cube.py +++ b/lib/iris/tests/unit/cube/test_Cube.py @@ -38,8 +38,9 @@ from iris.analysis import MEAN from iris.aux_factory import HybridHeightFactory from iris.cube import Cube -from iris.coords import AuxCoord, DimCoord, CellMeasure +from iris.coords import AuxCoord, DimCoord, CellMeasure, AncillaryVariable from iris.exceptions import (CoordinateNotFoundError, CellMeasureNotFoundError, + AncillaryVariableNotFoundError, UnitConversionError) from iris._lazy_data import as_lazy_data from iris.tests import mock @@ -423,6 +424,16 @@ def test_scalar_str_coord(self): summary = self.cube.summary() self.assertIn(str_value, summary) + def test_ancillary_variable(self): + cube = Cube(np.arange(6).reshape(2, 3)) + av = AncillaryVariable([1, 2], 'status_flag') + cube.add_ancillary_variable(av, 0) + expected_summary = \ + 'unknown / (unknown) (-- : 2; -- : 3)\n' \ + ' Ancillary Variables:\n' \ + ' status_flag x -' + self.assertEqual(cube.summary(), expected_summary) + class Test_is_compatible(tests.IrisTest): def setUp(self): @@ -1670,6 +1681,15 @@ def test_add_cell_measure(self): cube.add_cell_measure(a_cell_measure, [0, 1]) self.assertEqual(cube.cell_measure('area'), a_cell_measure) + def test_add_ancillary_variable(self): + cube = Cube(np.arange(6).reshape(2, 3)) + ancillary_variable = AncillaryVariable( + data=np.arange(6).reshape(2, 3), + long_name='detection quality') + cube.add_ancillary_variable(ancillary_variable, [0, 1]) + self.assertEqual(cube.ancillary_variable('detection quality'), + ancillary_variable) + def test_add_valid_aux_factory(self): cube = Cube(np.arange(8).reshape(2, 2, 2)) delta = AuxCoord(points=[0, 1], long_name='delta', units='m') @@ -1712,6 +1732,10 @@ def setUp(self): measure='area') cube.add_cell_measure(a_cell_measure, [0, 1]) cube.add_cell_measure(self.b_cell_measure, [0, 1]) + ancillary_variable = AncillaryVariable( + data=np.arange(6).reshape(2, 3), + long_name='Quality of Detection') + cube.add_ancillary_variable(ancillary_variable, [0, 1]) self.cube = cube def test_remove_dim_coord(self): @@ -1736,6 +1760,11 @@ def test_fail_remove_cell_measure_by_name(self): with self.assertRaises(CellMeasureNotFoundError): self.cube.remove_cell_measure('notarea') + def test_remove_ancilliary_variable(self): + self.cube.remove_ancillary_variable( + self.cube.ancillary_variable('Quality of Detection')) + self.assertEqual(self.cube._ancillary_variables_and_dims, []) + class Test__getitem_CellMeasure(tests.IrisTest): def setUp(self): @@ -1767,6 +1796,50 @@ def test_cell_measure_1d(self): result.cell_measures()[0].data.shape) +class TestAncillaryVariables(tests.IrisTest): + def setUp(self): + cube = Cube(10 * np.arange(6).reshape(2, 3)) + self.ancill_var = AncillaryVariable( + np.arange(6).reshape(2, 3), + standard_name='number_of_observations', units='1') + cube.add_ancillary_variable(self.ancill_var, [0, 1]) + self.cube = cube + + def test_get_ancillary_variable(self): + ancill_var = self.cube.ancillary_variable('number_of_observations') + self.assertEqual(ancill_var, self.ancill_var) + + def test_get_ancillary_variables(self): + ancill_vars = self.cube.ancillary_variables('number_of_observations') + self.assertEqual(len(ancill_vars), 1) + self.assertEqual(ancill_vars[0], self.ancill_var) + + def test_get_ancillary_variable_obj(self): + ancill_vars = self.cube.ancillary_variables(self.ancill_var) + self.assertEqual(len(ancill_vars), 1) + self.assertEqual(ancill_vars[0], self.ancill_var) + + def test_fail_get_ancillary_variables(self): + with self.assertRaises(AncillaryVariableNotFoundError): + self.cube.ancillary_variable('other_ancill_var') + + def test_fail_get_ancillary_variables_obj(self): + ancillary_variable = self.ancill_var.copy() + ancillary_variable.long_name = 'Number of observations at site' + with self.assertRaises(AncillaryVariableNotFoundError): + self.cube.ancillary_variable(ancillary_variable) + + def test_ancillary_variable_dims(self): + ancill_var_dims = self.cube.ancillary_variable_dims(self.ancill_var) + self.assertEqual(ancill_var_dims, (0, 1)) + + def test_fail_ancill_variable_dims(self): + ancillary_variable = self.ancill_var.copy() + ancillary_variable.long_name = 'Number of observations at site' + with self.assertRaises(AncillaryVariableNotFoundError): + self.cube.ancillary_variable_dims(ancillary_variable) + + class TestCellMeasures(tests.IrisTest): def setUp(self): cube = Cube(np.arange(6).reshape(2, 3)) @@ -1886,6 +1959,14 @@ def test_cell_measures(self): self.assertEqual(self.cube._cell_measures_and_dims, [(area_cm, (2, 0))]) + def test_ancillary_variables(self): + ancill_var = AncillaryVariable(data=np.arange(8).reshape(2, 4), + long_name='instrument error') + self.cube.add_ancillary_variable(ancill_var, (1, 2)) + self.cube.transpose() + self.assertEqual(self.cube._ancillary_variables_and_dims, + [(ancill_var, (1, 0))]) + class Test_convert_units(tests.IrisTest): def test_convert_unknown_units(self):