diff --git a/cf/data/dask_utils.py b/cf/data/dask_utils.py index d858b8ecf1..cc0df7da58 100644 --- a/cf/data/dask_utils.py +++ b/cf/data/dask_utils.py @@ -10,6 +10,7 @@ import dask.array as da import numpy as np +from ..cfdatetime import dt2rt, rt2dt from ..functions import atol as cf_atol from ..functions import rtol as cf_rtol @@ -439,3 +440,116 @@ def cf_where(array, condition, x, y, hardmask): array.harden_mask() return array + + +def _getattr(x, attr): + return getattr(x, attr, False) + + +_array_getattr = np.vectorize(_getattr, excluded="attr") + + +def cf_YMDhms(a, attr): + """Return a date-time component from an array of date-time objects. + + Only applicable for data with reference time units. The returned + array will have the same mask hardness as the original array. + + .. versionadded:: TODODASK + + .. seealso:: `~cf.Data.year`, ~cf.Data.month`, `~cf.Data.day`, + `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` + + :Parameters: + + a: `numpy.ndarray` + The array from which to extract date-time component. + + attr: `str` + The name of the date-time component, one of ``'year'``, + ``'month'``, ``'day'``, ``'hour'``, ``'minute'``, + ``'second'``. + + :Returns: + + `numpy.ndarray` + The date-time component. + + **Examples** + + >>> import numpy as np + >>> a = np.array([ + ... cftime.DatetimeGregorian(2000, 1, 1, 0, 0, 0, 0, has_year_zero=False) + ... cftime.DatetimeGregorian(2000, 1, 2, 0, 0, 0, 0, has_year_zero=False) + ... ]) + >>> cf_YMDmhs(a, 'day') + array([1, 2]) + + """ + return _array_getattr(a, attr=attr) + + +def cf_rt2dt(a, units): + """Convert an array of reference times to date-time objects. + + .. versionadded:: TODODASK + + .. seealso:: `cf._dt2rt`, `cf.Data._asdatetime` + + :Parameters: + + a: `numpy.ndarray` + An array of numeric reference times. + + units: `Units` + The units for the reference times + + + :Returns: + + `numpy.ndarray` + A array containing date-time objects. + + **Examples** + + >>> import numpy as np + >>> print(cf_rt2dt(np.array([0, 1]), cf.Units('days since 2000-01-01'))) + [cftime.DatetimeGregorian(2000, 1, 1, 0, 0, 0, 0, has_year_zero=False) + cftime.DatetimeGregorian(2000, 1, 2, 0, 0, 0, 0, has_year_zero=False)] + + """ + return rt2dt(a, units_in=units) + + +def cf_dt2rt(a, units): + """Convert an array of date-time objects to reference times. + + .. versionadded:: TODODASK + + .. seealso:: `cf._rt2dt`, `cf.Data._asreftime` + + :Parameters: + + a: `numpy.ndarray` + An array of date-time objects. + + units: `Units` + The units for the reference times + + :Returns: + + `numpy.ndarray` + An array containing numeric reference times + + **Examples** + + >>> import numpy as np + >>> a = np.array([ + ... cftime.DatetimeGregorian(2000, 1, 1, 0, 0, 0, 0, has_year_zero=False) + ... cftime.DatetimeGregorian(2000, 1, 2, 0, 0, 0, 0, has_year_zero=False) + ... ]) + >>> print(cf_dt2rt(a, cf.Units('days since 1999-01-01'))) + [365 366] + + """ + return dt2rt(a, units_out=units, units_in=None) diff --git a/cf/data/data.py b/cf/data/data.py index fb5c483265..ce5cabb5f4 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -20,7 +20,6 @@ from numpy.testing import suppress_warnings as numpy_testing_suppress_warnings from ..cfdatetime import dt as cf_dt -from ..cfdatetime import dt2rt, rt2dt # , st2rt from ..constants import masked as cf_masked from ..decorators import ( _deprecated_kwarg_check, @@ -105,8 +104,10 @@ ) from .dask_utils import ( _da_ma_allclose, + cf_dt2rt, cf_harden_mask, cf_percentile, + cf_rt2dt, cf_soften_mask, cf_where, ) @@ -115,6 +116,7 @@ from .partition import Partition from .partitionmatrix import PartitionMatrix from .utils import ( # is_small,; is_very_small, + YMDhms, _is_numeric_dtype, conform_units, convert_to_datetime, @@ -1475,9 +1477,10 @@ def _del_dask(self, default=ValueError(), delete_source=True): def _map_blocks(self, func, **kwargs): """Apply a function to the data in-place. - .. note:: This method does not reset the mask hardness. It may - be necessary for a call to `_map_blocks` to be - followed by a call to `_reset_mask_hardness`. + .. warning:: **This method **does not reset the mask + hardness**. It may be necessary for a call to + `_map_blocks` to be followed by a call to + `_reset_mask_hardness` (or equivalent). .. versionadded:: TODODASK @@ -3298,6 +3301,7 @@ def rechunk( return d + @daskified(_DASKIFIED_VERBOSE) @_inplace_enabled(default=False) def _asdatetime(self, inplace=False): """Change the internal representation of data array elements @@ -3324,46 +3328,37 @@ def _asdatetime(self, inplace=False): `Data` or `None` - **Examples:** + **Examples** - >>> d._asdatetime() + >>> d = cf.Data([[1.93, 5.17]], "days since 2000-12-29") + >>> e = d._asdatetime() + >>> print(e.array) + [[cftime.DatetimeGregorian(2000, 12, 30, 22, 19, 12, 0, has_year_zero=False) + cftime.DatetimeGregorian(2001, 1, 3, 4, 4, 48, 0, has_year_zero=False)]] + >>> f = e._asreftime() + >>> print(f.array) + [[1.93 5.17]] """ d = _inplace_enabled_define_and_cleanup(self) - units = self.Units + units = d.Units if not units.isreftime: raise ValueError( - "Can't convert {!r} data to date-time objects".format(units) + f"Can't convert {units!r} values to date-time objects" ) - if d._isdatetime(): - if inplace: - d = None - return d - - config = d.partition_configuration( - readonly=False, func=rt2dt, dtype=None - ) - - for partition in d.partitions.matrix.flat: - partition.open(config) - array = partition.array - p_units = partition.Units - partition.Units = Units(p_units.units, p_units._utime.calendar) - partition.close() - - d.Units = Units(units.units, units._utime.calendar) - - d._dtype = array.dtype + if not d._isdatetime(): + d._map_blocks(cf_rt2dt, units=units, dtype=object) return d + @daskified(_DASKIFIED_VERBOSE) def _isdatetime(self): - """True if the internal representation is a datetime-like - object.""" + """True if the internal representation is a datetime object.""" return self.dtype.kind == "O" and self.Units.isreftime + @daskified(_DASKIFIED_VERBOSE) @_inplace_enabled(default=False) def _asreftime(self, inplace=False): """Change the internal representation of data array elements @@ -3388,40 +3383,28 @@ def _asreftime(self, inplace=False): `Data` or `None` - **Examples:** + **Examples** - >>> d._asreftime() + >>> d = cf.Data([[1.93, 5.17]], "days since 2000-12-29") + >>> e = d._asdatetime() + >>> print(e.array) + [[cftime.DatetimeGregorian(2000, 12, 30, 22, 19, 12, 0, has_year_zero=False) + cftime.DatetimeGregorian(2001, 1, 3, 4, 4, 48, 0, has_year_zero=False)]] + >>> f = e._asreftime() + >>> print(f.array) + [[1.93 5.17]] """ d = _inplace_enabled_define_and_cleanup(self) - units = d.Units - - if not d._isdatetime(): - if units.isreftime: - if inplace: - d = None - return d - else: - raise ValueError( - "Can't convert {!r} data to numeric reference " - "times".format(units) - ) - # --- End: if - - config = d.partition_configuration( - readonly=False, func=dt2rt, dtype=None - ) - for partition in d.partitions.matrix.flat: - partition.open(config) - array = partition.array - p_units = partition.Units - partition.Units = Units(p_units.units, p_units._utime.calendar) - partition.close() - - d.Units = Units(units.units, units._utime.calendar) + units = d.Units + if not units.isreftime: + raise ValueError( + f"Can't convert {units!r} values to numeric reference times" + ) - d._dtype = array.dtype + if d._isdatetime(): + d._map_blocks(cf_dt2rt, units=units, dtype=float) return d @@ -8793,181 +8776,137 @@ def cyclic(self, axes=None, iscyclic=True): return old - def _YMDhms(self, attr): - """Provides datetime components of the data array elements. - - .. seealso:: `~cf.Data.year`, ~cf.Data.month`, `~cf.Data.day`, - `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` - - """ - - def _func(array, units_in, dummy0, dummy1): - """The returned array is always independent. - - :Parameters: - - array: numpy array - - units_in: `Units` - - dummy0: - Ignored. - - dummy1: - Ignored. - - :Returns: - - numpy array - - """ - if not self._isdatetime(): - array = rt2dt(array, units_in) - - return _array_getattr(array, attr) - - # --- End: def - - if not self.Units.isreftime: - raise ValueError( - "Can't get {}s from data with {!r}".format(attr, self.Units) - ) - - new = self.copy() - - new._Units = _units_None - - config = new.partition_configuration( - readonly=False, func=_func, dtype=None - ) - - for partition in new.partitions.matrix.flat: - partition.open(config) - array = partition.array - new_dtype = array.dtype - partition.close() - - new._dtype = new_dtype - - return new - @property def year(self): - """The year of each data array element. + """The year of each date-time value. - Only applicable for reference time units. + Only applicable for data with reference time units. The + returned `Data` will have the same mask hardness as the + original array. .. seealso:: `~cf.Data.month`, `~cf.Data.day`, `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` - **Examples:** + **Examples** >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d - + >>> d.year - + """ - return self._YMDhms("year") + return YMDhms(self, "year") @property def month(self): - """The month of each data array element. + """The month of each date-time value. - Only applicable for reference time units. + Only applicable for data with reference time units. The + returned `Data` will have the same mask hardness as the + original array. .. seealso:: `~cf.Data.year`, `~cf.Data.day`, `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` - **Examples:** + **Examples** >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d - + >>> d.month - + """ - return self._YMDhms("month") + return YMDhms(self, "month") @property def day(self): - """The day of each data array element. + """The day of each date-time value. - Only applicable for reference time units. + Only applicable for data with reference time units. The + returned `Data` will have the same mask hardness as the + original array. .. seealso:: `~cf.Data.year`, `~cf.Data.month`, `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` - **Examples:** + **Examples** >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d - + >>> d.day - + """ - return self._YMDhms("day") + return YMDhms(self, "day") @property def hour(self): - """The hour of each data array element. + """The hour of each date-time value. - Only applicable for reference time units. + Only applicable for data with reference time units. The + returned `Data` will have the same mask hardness as the + original array. .. seealso:: `~cf.Data.year`, `~cf.Data.month`, `~cf.Data.day`, `~cf.Data.minute`, `~cf.Data.second` - **Examples:** + **Examples** >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d - + >>> d.hour - + """ - return self._YMDhms("hour") + return YMDhms(self, "hour") @property def minute(self): - """The minute of each data array element. + """The minute of each date-time value. - Only applicable for reference time units. + Only applicable for data with reference time units. The + returned `Data` will have the same mask hardness as the + original array. .. seealso:: `~cf.Data.year`, `~cf.Data.month`, `~cf.Data.day`, `~cf.Data.hour`, `~cf.Data.second` - **Examples:** + **Examples** >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d - + >>> d.minute - + """ - return self._YMDhms("minute") + return YMDhms(self, "minute") @property def second(self): - """The second of each data array element. + """The second of each date-time value. - Only applicable for reference time units. + Only applicable for data with reference time units. The + returned `Data` will have the same mask hardness as the + original array. .. seealso:: `~cf.Data.year`, `~cf.Data.month`, `~cf.Data.day`, `~cf.Data.hour`, `~cf.Data.minute` + **Examples** + >>> d = cf.Data([[1.93, 5.17]], 'days since 2000-12-29') >>> d - + >>> d.second - + """ - return self._YMDhms("second") + return YMDhms(self, "second") @_inplace_enabled(default=False) def uncompress(self, inplace=False): @@ -13627,18 +13566,6 @@ def _overlapping_partitions(partitions, indices, axes, master_flip): return new_partition_matrix -# -------------------------------------------------------------------- -# ??? -# -------------------------------------------------------------------- -def _getattr(x, attr): - if not x: - return False - return getattr(x, attr) - - -_array_getattr = np.vectorize(_getattr) - - def _broadcast(a, shape): """Broadcast an array to a given shape. diff --git a/cf/data/utils.py b/cf/data/utils.py index 3d3c935db9..cda7f77c6e 100644 --- a/cf/data/utils.py +++ b/cf/data/utils.py @@ -8,6 +8,7 @@ from ..cfdatetime import dt as cf_dt from ..cfdatetime import dt2rt, rt2dt, st2rt from ..units import Units +from .dask_utils import cf_YMDhms def _is_numeric_dtype(array): @@ -493,7 +494,7 @@ def conform_units(value, units): units: `Units` The units to conform to. - **Examples:** + **Examples** >>> conform_units(1, cf.Units('metres')) 1 @@ -530,3 +531,46 @@ def conform_units(value, units): ) return value + + +def YMDhms(d, attr): + """Return a date-time component of the data. + + Only applicable for data with reference time units. The returned + `Data` will have the same mask hardness as the original array. + + .. versionadded:: TODODASK + + .. seealso:: `~cf.Data.year`, ~cf.Data.month`, `~cf.Data.day`, + `~cf.Data.hour`, `~cf.Data.minute`, `~cf.Data.second` + + :Parameters: + + d: `Data` + The data from which to extract date-time component. + + attr: `str` + The name of the date-time component, one of ``'year'``, + ``'month'``, ``'day'``, ``'hour'``, ``'minute'``, + ``'second'``. + + :Returns: + + `Data` + The date-time component + + **Examples** + + >>> d = cf.Data([0, 1, 2], 'days since 1999-12-31') + >>> YMDhms(d, 'year').array + >>> array([1999, 2000, 2000]) + + """ + units = d.Units + if not units.isreftime: + raise ValueError(f"Can't get {attr}s from data with {units!r}") + + d = d._asdatetime() + d._map_blocks(partial(cf_YMDhms, attr=attr), dtype=int) + d.override_units(Units(None), inplace=True) + return d diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index edf0228bcd..9d2eefd14d 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -1642,30 +1642,24 @@ def test_Data_datetime_array(self): ).all() ) - @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'") - def test_Data__asdatetime__asreftime__isdatetime(self): + def test_Data_asdatetime_asreftime_isdatetime(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return d = cf.Data([[1.93, 5.17]], "days since 2000-12-29") - self.assertEqual(d.dtype, np.dtype(float)) self.assertFalse(d._isdatetime()) - self.assertIsNone(d._asreftime(inplace=True)) - self.assertEqual(d.dtype, np.dtype(float)) self.assertFalse(d._isdatetime()) - self.assertIsNone(d._asdatetime(inplace=True)) - self.assertEqual(d.dtype, np.dtype(object)) - self.assertTrue(d._isdatetime()) + e = d._asdatetime() + self.assertTrue(e._isdatetime()) + self.assertEqual(e.dtype, np.dtype(object)) + self.assertIsNone(e._asdatetime(inplace=True)) + self.assertTrue(e._isdatetime()) - self.assertIsNone(d._asdatetime(inplace=True)) - self.assertEqual(d.dtype, np.dtype(object)) - self.assertTrue(d._isdatetime()) - - self.assertIsNone(d._asreftime(inplace=True)) - self.assertEqual(d.dtype, np.dtype(float)) - self.assertFalse(d._isdatetime()) + # Round trip + f = e._asreftime() + self.assertTrue(f.equals(d)) def test_Data_ceil(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: @@ -1992,7 +1986,6 @@ def test_Data_varray(self): v[0, 0, 0, 0] = 0 self.assertTrue((v == b).all()) - @unittest.skipIf(TEST_DASKIFIED_ONLY, "no attr. 'partition_configuration'") def test_Data_year_month_day_hour_minute_second(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: return @@ -2015,6 +2008,10 @@ def test_Data_year_month_day_hour_minute_second(self): self.assertTrue(d.minute.equals(cf.Data([[37, 25]]))) self.assertTrue(d.second.equals(cf.Data([[26, 26]]))) + # Can't get year from data with non-reference time units + with self.assertRaises(ValueError): + cf.Data([[1, 2]], units="m").year + @unittest.skipIf(TEST_DASKIFIED_ONLY, "'NoneType' is not iterable") def test_Data_BINARY_AND_UNARY_OPERATORS(self): if self.test_only and inspect.stack()[0][3] not in self.test_only: