From afddae6e314556c52dd5ffb43cc812b383f2a79f Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Thu, 19 Dec 2024 14:34:34 +1300 Subject: [PATCH 01/18] Add time selection between dynamic doy --- xclim/core/calendar.py | 132 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 117 insertions(+), 15 deletions(-) diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 14b47084f..3a0a6caf3 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -1078,12 +1078,124 @@ def days_since_to_doy( return out.convert_calendar(base_calendar).rename(da.name) +def _get_doys(start: int, end: int, inclusive: tuple[bool, bool]): + """Get the day of year list from start to end. + + Parameters + ---------- + start : int + Start day of year. + end : int + End day of year. + inclusive : 2-tuple of booleans + Whether the bounds should be inclusive or not. + + Returns + ------- + np.ndarray + Array of day of year between the start and end. + """ + if start <= end: + doys = np.arange(start, end + 1) + else: + doys = np.concatenate((np.arange(start, 367), np.arange(0, end + 1))) + if not inclusive[0]: + doys = doys[1:] + if not inclusive[1]: + doys = doys[:-1] + return doys + + +def mask_between_doys( + da: xr.DataArray, + doy_bounds: tuple[int | xr.DataArray, int | xr.DataArray], + include_bounds: tuple[bool, bool] = [True, True], +) -> xr.DataArray | xr.Dataset: + """Mask the data outside the day of year bounds. + + Parameters + ---------- + da : xr.DataArray or xr.Dataset + Input data. + doy_bounds : 2-tuple of integers or xr.DataArray + The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from + 1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given, + the int day-of-year corresponds to the year of the xr.DataArray. + include_bounds : 2-tuple of booleans + Whether the bounds of `doy_bounds` should be inclusive or not. + + Returns + ------- + xr.DataArray or xr.Dataset + Boolean mask array with the same shape as `da` with True value inside the period of + interest and False outside. + """ + if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): + mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, [True, True])) + + else: + cal = get_calendar(da, dim="time") + + start, end = doy_bounds + if isinstance(start, int): + start = xr.where(end.isnull(), np.nan, start) + start = start.convert_calendar(cal) + start.attrs["calendar"] = cal + else: + start = start.convert_calendar(cal) + start.attrs["calendar"] = cal + start = doy_to_days_since(start) + + if isinstance(end, int): + end = xr.where(start.isnull(), np.nan, end) + end = end.convert_calendar(cal) + end.attrs["calendar"] = cal + else: + end = end.convert_calendar(cal) + end.attrs["calendar"] = cal + end = doy_to_days_since(end) + + freq = xr.infer_freq(start.time) + out = [] + for base_time, indexes in da.resample(time=freq).groups.items(): + # get group slice + group = da.isel(time=indexes) + + if base_time in start.time: + start_d = start.sel(time=base_time) + else: + start_d = None + if base_time in end.time: + end_d = end.sel(time=base_time) + else: + end_d = None + + if start_d is not None and end_d is not None: + if not include_bounds[0]: + start_d += 1 + if not include_bounds[1]: + end_d -= 1 + + # select days between start and end for group + days = (group.time - base_time).dt.days + days[days < 0] = np.nan + + mask = (days >= start_d) & (days <= end_d) + else: + # Get an array with the good shape and put False + mask = xr.where(group.isel(time=0), False, False) + + out.append(mask) + mask = xr.concat(out, dim="time") + return mask + + def select_time( da: xr.DataArray | xr.Dataset, drop: bool = False, season: str | Sequence[str] | None = None, month: int | Sequence[int] | None = None, - doy_bounds: tuple[int, int] | None = None, + doy_bounds: tuple[int | xr.DataArray, int | xr.DataArray] | None = None, date_bounds: tuple[str, str] | None = None, include_bounds: bool | tuple[bool, bool] = True, ) -> DataType: @@ -1104,9 +1216,10 @@ def select_time( One or more of 'DJF', 'MAM', 'JJA' and 'SON'. month : integer or sequence of integers, optional Sequence of month numbers (January = 1 ... December = 12) - doy_bounds : 2-tuple of integers, optional + doy_bounds : 2-tuple of integers or xr.DataArray, optional The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from - 1 (January 1st) to 365 or 366 (December 31st). + 1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given, + the int day-of-year corresponds to the year of the xr.DataArray. If calendar awareness is needed, consider using ``date_bounds`` instead. date_bounds : 2-tuple of strings, optional The bounds as (start, end) of the period of interest expressed as dates in the month-day (%m-%d) format. @@ -1148,17 +1261,6 @@ def select_time( if N == 0: return da - def _get_doys(_start, _end, _inclusive): - if _start <= _end: - _doys = np.arange(_start, _end + 1) - else: - _doys = np.concatenate((np.arange(_start, 367), np.arange(0, _end + 1))) - if not _inclusive[0]: - _doys = _doys[1:] - if not _inclusive[1]: - _doys = _doys[:-1] - return _doys - if isinstance(include_bounds, bool): include_bounds = (include_bounds, include_bounds) @@ -1173,7 +1275,7 @@ def _get_doys(_start, _end, _inclusive): mask = da.time.dt.month.isin(month) elif doy_bounds is not None: - mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds)) + mask = mask_between_doys(da, doy_bounds, include_bounds) elif date_bounds is not None: # This one is a bit trickier. From 5ec7ec657638adb8ec9fb1e4f0de1d823a541ec6 Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Mon, 20 Jan 2025 16:21:31 +1300 Subject: [PATCH 02/18] Fix the inclusion of bounds for int doys --- src/xclim/core/calendar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index 0de782d27..b7d09bcaf 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1171,7 +1171,7 @@ def mask_between_doys( interest and False outside. """ if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): - mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, [True, True])) + mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds)) else: cal = get_calendar(da, dim="time") From ac76379e9aa66128663cc54acae6a514949cfb66 Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Wed, 22 Jan 2025 15:55:35 +1300 Subject: [PATCH 03/18] Add details to AUTHORS.rst --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 14cb18707..0fd1dc9fa 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -48,3 +48,4 @@ Contributors * Adrien Lamarche `@LamAdr `_ * Faisal Mahmood `@faimahsho `_ * Sebastian Lehner `@seblehner `_ +* Baptiste Hamon `@baptistehamon `_ From 2e40c9f0b6baa628718f3e72e20f43c3002e330b Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Wed, 22 Jan 2025 16:15:18 +1300 Subject: [PATCH 04/18] Add details to zenodo file --- .zenodo.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.zenodo.json b/.zenodo.json index bf4fc7f19..e53f692ac 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -153,6 +153,11 @@ "name": "Lehner, Sebastian", "affiliation": "GeoSphere Austria, Vienna, Austria", "orcid": "0000-0002-7562-8172" + }, + { + "name": "Hamon, Baptiste", + "affiliation": "University of Canterbury, Christchurch, New Zealand", + "orcid": "0009-0007-4530-9772" } ], "keywords": [ From 0c815b9429760d1935c1be09ebe80a8fd66c3607 Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Tue, 28 Jan 2025 13:39:24 +1300 Subject: [PATCH 05/18] Add a check to ensure that doy bounds have same freq in mask_between_doys --- src/xclim/core/calendar.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index b7d09bcaf..471fffae2 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1195,7 +1195,21 @@ def mask_between_doys( end.attrs["calendar"] = cal end = doy_to_days_since(end) - freq = xr.infer_freq(start.time) + freq = [] + for bound in [start, end]: + try: + freq.append(xr.infer_freq(bound.time)) + except ValueError: + freq.append(None) + freq = set(freq) - {None} + if len(freq) != 1: + raise ValueError( + f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {freq}." + " Please consider providing `freq` manually." + ) + else: + freq = freq.pop() + out = [] for base_time, indexes in da.resample(time=freq).groups.items(): # get group slice From c52cd63b259b4ead6b2736a523a1ed683ffb984d Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Tue, 28 Jan 2025 14:44:43 +1300 Subject: [PATCH 06/18] Fix bug when masking between doys on dataset --- src/xclim/core/calendar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index 471fffae2..69327e9cd 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1237,7 +1237,8 @@ def mask_between_doys( mask = (days >= start_d) & (days <= end_d) else: # Get an array with the good shape and put False - mask = xr.where(group.isel(time=0), False, False) + mask = start.isel(time=0).drop_vars("time").expand_dims(time=group.time) + mask = xr.full_like(mask, False) out.append(mask) mask = xr.concat(out, dim="time") From 36b1862afd2732d45f09ad68b7c9f9e2699e2ac9 Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Tue, 28 Jan 2025 14:56:58 +1300 Subject: [PATCH 07/18] Edit freq error message in mask_between_doys --- src/xclim/core/calendar.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index 69327e9cd..e3790ddb2 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1205,7 +1205,6 @@ def mask_between_doys( if len(freq) != 1: raise ValueError( f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {freq}." - " Please consider providing `freq` manually." ) else: freq = freq.pop() From ac20249ee8524fec32524ab5a5abbb9aa189f3bc Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 31 Jan 2025 17:54:43 -0500 Subject: [PATCH 08/18] Rewrite mask_between_doys to add spatial dims support --- src/xclim/core/calendar.py | 144 ++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 74 deletions(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index e3790ddb2..bbb579cd3 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1037,7 +1037,7 @@ def doy_to_days_since( # 2cases: # val is a day in the same year as its index : da - offset # val is a day in the next year : da + doy_max - offset - out = xr.where(dac > base_doy, dac, dac + doy_max) - start_doy + out = xr.where(dac >= base_doy, dac, dac + doy_max) - start_doy out.attrs.update(da.attrs) if start is not None: out.attrs.update(units=f"days after {start}") @@ -1156,91 +1156,87 @@ def mask_between_doys( Parameters ---------- da : xr.DataArray or xr.Dataset - Input data. - doy_bounds : 2-tuple of integers or xr.DataArray + Input data. It must have a time coordinate. + doy_bounds : 2-tuple of integers or DataArray The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from - 1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given, - the int day-of-year corresponds to the year of the xr.DataArray. + 1 (January 1st) to 365 or 366 (December 31st). + If DataArrays are passed, they must have the same coordinates on the dimensions they share. + They may have a time dimension, in which case the masking is done independently for each period defined by the coordinate, + which means the time coordinate must have an inferable frequency (see :py:func:`xr.infer_freq`). + Timesteps of the input not appearing in the time coordinate of the bounds are masked as "outside the bounds". + Missing values (nan) in the bounds are treated as an open bound (same as a None in a slice). include_bounds : 2-tuple of booleans Whether the bounds of `doy_bounds` should be inclusive or not. Returns ------- - xr.DataArray or xr.Dataset - Boolean mask array with the same shape as `da` with True value inside the period of - interest and False outside. + xr.DataArray + Boolean array with the same time coordinate as `da` and any other dimension present on the bounds. + True value inside the period of interest and False outside. """ - if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): + if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): # Simple case mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds)) - else: - cal = get_calendar(da, dim="time") - start, end = doy_bounds + # convert ints to DataArrays if isinstance(start, int): - start = xr.where(end.isnull(), np.nan, start) - start = start.convert_calendar(cal) - start.attrs["calendar"] = cal - else: - start = start.convert_calendar(cal) - start.attrs["calendar"] = cal - start = doy_to_days_since(start) - - if isinstance(end, int): - end = xr.where(start.isnull(), np.nan, end) - end = end.convert_calendar(cal) - end.attrs["calendar"] = cal - else: - end = end.convert_calendar(cal) - end.attrs["calendar"] = cal - end = doy_to_days_since(end) - - freq = [] - for bound in [start, end]: - try: - freq.append(xr.infer_freq(bound.time)) - except ValueError: - freq.append(None) - freq = set(freq) - {None} - if len(freq) != 1: - raise ValueError( - f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {freq}." + start = xr.full_like(end, start) + elif isinstance(end, int): + end = xr.full_like(start, end) + # Ensure they both have the same dims + # align join='exact' will fail on common but different coords, broadcast will add missing coords + start, end = xr.broadcast(*xr.align(start, end, join="exact")) + + if not include_bounds[0]: + start += 1 + if not include_bounds[1]: + end -= 1 + + if "time" in start.dims: + freq = xr.infer_freq(start.time) + # Convert the doy bounds to a duration since the beginning of each period defined in the bound's time coordinate + # Also ensures the bounds share the sime time calendar as the input + # Any missing value is replaced with the min/max of possible values + calkws = dict( + calendar=da.time.dt.calendar, use_cftime=(da.time.dtype == "O") + ) + start = doy_to_days_since(start.convert_calendar(**calkws)).fillna(0) + end = doy_to_days_since(end.convert_calendar(**calkws)).fillna(366) + + out = [] + # For each period, mask the days since between start and end + for base_time, indexes in da.resample(time=freq).groups.items(): + group = da.isel(time=indexes) + + if base_time in start.time: + start_d = start.sel(time=base_time) + end_d = end.sel(time=base_time) + + # select days between start and end for group + days = (group.time - base_time).dt.days + days = days.where(days >= 0) + mask = (days >= start_d) & (days <= end_d) + else: # This group has no defined bounds : put False in the mask + # Array with the same shape as the "mask" in the other case : broadcast of time and bounds dims + template = xr.broadcast( + group.time.dt.day, start.isel(time=0, drop=True) + )[0] + mask = xr.full_like(template, False, dtype="bool") + out.append(mask) + mask = xr.concat(out, dim="time") + else: # Only "Spatial" dims, we can't constrain as in days since, so there are two cases + doys = da.time.dt.dayofyear # for readability + # Any missing value is replaced with the min/max of possible values + start = start.fillna(1) + end = end.fillna(366) + mask = xr.where( + start <= end, + (doys >= start) + & (doys <= end), # case 1 : start <= end, ROI is within a calendar year + ~( + (doys >= end) & (doys <= start) + ), # case 2 : start > end, ROI crosses the new year ) - else: - freq = freq.pop() - - out = [] - for base_time, indexes in da.resample(time=freq).groups.items(): - # get group slice - group = da.isel(time=indexes) - - if base_time in start.time: - start_d = start.sel(time=base_time) - else: - start_d = None - if base_time in end.time: - end_d = end.sel(time=base_time) - else: - end_d = None - - if start_d is not None and end_d is not None: - if not include_bounds[0]: - start_d += 1 - if not include_bounds[1]: - end_d -= 1 - - # select days between start and end for group - days = (group.time - base_time).dt.days - days[days < 0] = np.nan - - mask = (days >= start_d) & (days <= end_d) - else: - # Get an array with the good shape and put False - mask = start.isel(time=0).drop_vars("time").expand_dims(time=group.time) - mask = xr.full_like(mask, False) - - out.append(mask) - mask = xr.concat(out, dim="time") return mask From 5c91ef33984717f90f5fa0635ce43604085c8c18 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:42:50 +0000 Subject: [PATCH 09/18] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/xclim/core/calendar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index bbb579cd3..61881924e 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1118,7 +1118,8 @@ def days_since_to_doy( def _get_doys(start: int, end: int, inclusive: tuple[bool, bool]): - """Get the day of year list from start to end. + """ + Get the day of year list from start to end. Parameters ---------- From 775f9ed83ac4520fc9f887018f3df0479be4ece2 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 11 Feb 2025 13:56:14 -0500 Subject: [PATCH 10/18] Add tests - fix doc - fix end < start case for bounds --- src/xclim/core/calendar.py | 12 +++++----- tests/test_generic.py | 48 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index 61881924e..b7e23c6b8 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1165,7 +1165,8 @@ def mask_between_doys( They may have a time dimension, in which case the masking is done independently for each period defined by the coordinate, which means the time coordinate must have an inferable frequency (see :py:func:`xr.infer_freq`). Timesteps of the input not appearing in the time coordinate of the bounds are masked as "outside the bounds". - Missing values (nan) in the bounds are treated as an open bound (same as a None in a slice). + Missing values (nan) in the start and end bounds default to 1 and 366 respectively in the non-temporal case + and to open bounds (the start and end of the period) in the temporal case. include_bounds : 2-tuple of booleans Whether the bounds of `doy_bounds` should be inclusive or not. @@ -1232,11 +1233,10 @@ def mask_between_doys( end = end.fillna(366) mask = xr.where( start <= end, - (doys >= start) - & (doys <= end), # case 1 : start <= end, ROI is within a calendar year - ~( - (doys >= end) & (doys <= start) - ), # case 2 : start > end, ROI crosses the new year + # case 1 : start <= end, ROI is within a calendar year + (doys >= start) & (doys <= end), + # case 2 : start > end, ROI crosses the new year + ~((doys > end) & (doys < start)), ) return mask diff --git a/tests/test_generic.py b/tests/test_generic.py index 8191624a9..203cf31df 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -624,6 +624,54 @@ def test_select_time_doys(self): ) xr.testing.assert_equal(out, exp) + def test_select_time_doys_2D_spatial(self): + # first doy of da is 44, last is 366 + da = self.series("2003-02-13", "2004-12-31", "default").expand_dims( + lat=[0, 10, 15, 20, 25] + ) + # 5 cases + # normal : start < end + # over NYE : end < start + # end is nan (i.e. 366) + # start is nan (i.e. 1) + # both are nan (no drop) + start = xr.DataArray( + [50, 340, 100, np.nan, np.nan], dims=("lat",), coords={"lat": da.lat} + ) + end = xr.DataArray( + [200, 20, np.nan, 200, np.nan], dims=("lat",), coords={"lat": da.lat} + ) + out = select_time(da, doy_bounds=(start, end)) + + np.testing.assert_array_equal( + out.notnull().sum("time"), + [151 * 2, 26 + 20 + 27, 266 + 267, 200 - 43 + 200, 365 - 43 + 366], + ) + + def test_select_time_doys_2D_temporal(self): + # YS-JUL periods: + # -2003: 44 to 181, 03-04: 182 to 182, 04-05: 183 to 181, 05-06: 182 to 181, 06-07: 182 to 183, 07-: 182 to 365 + da = self.series("2003-02-13", "2007-12-31", "default") + # Same 5 cases, but in YS-JUL + # -03 : no bounds for the first period, so no selection. + # 03-04 : normal : start < end + # 04-05 : over NYE : end < start + # 05-06 : end is nan (i.e. end of period) + # 06-07 : start is nan (i.e. start of period) + # 07- : both are nan (no drop) + time = xr.date_range("2003-07-01", freq="YS-JUL", periods=5) + start = xr.DataArray( + [50, 340, 100, np.nan, np.nan], dims=("time",), coords={"time": time} + ) + end = xr.DataArray( + [100, 20, np.nan, 200, np.nan], dims=("time",), coords={"time": time} + ) + out = select_time(da, doy_bounds=(start, end)) + + np.testing.assert_array_equal( + out.notnull().resample(time="YS-JUL").sum(), [0, 51, 47, 82, 19, 184] + ) + def test_select_time_dates(self): da = self.series("2003-02-13", "2004-11-01", "all_leap") da = da.where(da.time.dt.dayofyear != 92, drop=True) # no 04-01 From fccf1b5513617286ca5090b905c0014e3cfa4704 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Tue, 11 Feb 2025 14:12:14 -0500 Subject: [PATCH 11/18] Add changelog line --- CHANGELOG.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 5a371d480..38fa9de6a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,7 +4,7 @@ Changelog v0.55.0 (unreleased) -------------------- -Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`), Sascha Hofmann (:user:`saschahofmann`). +Contributors to this version: Juliette Lavoie (:user:`juliettelavoie`), Trevor James Smith (:user:`Zeitsperre`), Sascha Hofmann (:user:`saschahofmann`), Baptiste Hamon (:user:`baptistehamon`), Pascal Bourgault (:user:`aulemahal`). Announcements ^^^^^^^^^^^^^ @@ -23,6 +23,7 @@ New features and enhancements * `xclim` now tracks energy usage and carbon emissions ("last run", "average", and "total") during CI workflows using the `eco-ci-energy-estimation` GitHub Action. (:pull:`2046`). * ``xclim.testing.helpers.test_timeseries`` now accepts a `calendar` argument that is forwarded to ``xr.cftime_range``. (:pull:`2019`). * New ``xclim.indices.fao_allen98``, exporting the FAO-56 Penman-Monteith equation for potential evapotranspiration (:issue:`2004`, :pull:`2067`). +* Time selection in ``xclim.core.calendar.select_time`` and the ``**indexer`` argument of indicators now support day-of-year bounds given as DataArrays with spatial and/or temporal dimensions. (:issue:`1987`, :pull:`2055`). Internal changes ^^^^^^^^^^^^^^^^ From 19f3f4265a6fb11784403e07b3b9b10dd6d67daa Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 12 Feb 2025 16:50:10 -0500 Subject: [PATCH 12/18] invert the problem, stop dropping when indexing --- CHANGELOG.rst | 3 +- src/xclim/core/missing.py | 59 +++++++++++++++++---------------------- tests/test_missing.py | 23 +++------------ tests/test_options.py | 2 +- 4 files changed, 32 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2accaec30..2fcdae04a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -35,9 +35,10 @@ Internal changes * `sphinx-codeautolink` and `pygments` have been temporarily pinned due to breaking API changes. (:pull:`2030`). * Adjusted the ``TestOfficialYaml`` test to use a dynamic method for finding the installed location of `xclim`. (:pull:`2028`). * Adjusted two tests for better handling when running in Windows environments. (:pull:`2057`). -* Refactor of the ``xclim.core.missing`` module, usage of the ``Missing`` objects has been broken. (:pull:`2058`, :issue:`1820`, :issue:`2000`). +* Refactor of the ``xclim.core.missing`` module, usage of the ``Missing`` objects has been broken. (:pull:`2058`, :pull:`2055`, :pull:`2076`, :issue:`1820`, :issue:`2000`). - Objects are initialized with their options and then called with the data, input frequency, target frequency and indexer. - Subclasses receive non-resampled DataArray in their ``is_missing`` methods. + - Subclasses receive the array of valid timesteps ``valid`` instead of ``null``, the invalid ones. - ``MissingWMO`` now uses ``xclim.indices.helpers.resample_map`` which should greatly improve performance in a dask context. Bug fixes diff --git a/src/xclim/core/missing.py b/src/xclim/core/missing.py index 54f0bb7e3..f5c5ee197 100644 --- a/src/xclim/core/missing.py +++ b/src/xclim/core/missing.py @@ -203,9 +203,9 @@ def validate(**options): return True @staticmethod - def is_null(da: xr.DataArray, **indexer) -> xr.DataArray: + def is_valid(da: xr.DataArray, **indexer) -> xr.DataArray: r""" - Return a boolean array indicating which values are null. + Return a boolean array indicating which values are valid. Parameters ---------- @@ -219,27 +219,17 @@ def is_null(da: xr.DataArray, **indexer) -> xr.DataArray: Returns ------- xr.DataArray - Boolean array indicating which values are null. - - Raises - ------ - ValueError - If no data is available for the selected period. + Boolean array indicating which values are valid. """ - indexer.update({"drop": True}) selected = select_time(da, **indexer) - if selected.time.size == 0: - raise ValueError("No data for selected period.") - - null = selected.isnull() - return null + return selected.notnull() def _validate_src_timestep(self, src_timestep): return True def is_missing( self, - null: xr.DataArray, + valid: xr.DataArray, count: xr.DataArray, freq: str | None, ) -> xr.DataArray: @@ -250,8 +240,8 @@ def is_missing( Parameters ---------- - null : DataArray - Boolean array of invalid values (that has already been indexed). + valid : DataArray + Boolean array of valid values (that has already been indexed). count : DataArray Indexer-aware integer array of number of expected elements at the resampling frequency. freq : str or None @@ -310,8 +300,8 @@ def __call__( ) count = expected_count(da.time, freq=freq, src_timestep=src_timestep, **indexer) - null = self.is_null(da, **indexer) - return self.is_missing(null, count, freq) + valid = self.is_valid(da, **indexer) + return self.is_missing(valid, count, freq) def __repr__(self): opt_str = ", ".join([f"{k}={v}" for k, v in self.options.items()]) @@ -332,13 +322,12 @@ def __init__(self): super().__init__() def is_missing( - self, null: xr.DataArray, count: xr.DataArray, freq: str | None + self, valid: xr.DataArray, count: xr.DataArray, freq: str | None ) -> xr.DataArray: if freq is not None: - null = null.resample(time=freq) - cond0 = null.count(dim="time") != count # Check total number of days - cond1 = null.sum(dim="time") > 0 # Check if any is missing - return cond0 | cond1 + valid = valid.resample(time=freq) + # The number of valid values should fit the expected count. + return valid.sum(dim="time") != count # TODO: Make coarser method controllable. @@ -446,21 +435,23 @@ def _validate_src_timestep(self, src_timestep): return src_timestep == "D" def is_missing( - self, null: xr.DataArray, count: xr.DataArray, freq: str + self, valid: xr.DataArray, count: xr.DataArray, freq: str ) -> xr.DataArray: from xclim.indices import run_length as rl from xclim.indices.helpers import resample_map - nullr = null.resample(time=freq) + validr = valid.resample(time=freq) # Total number of missing or invalid days - missing_days = (count - nullr.count(dim="time")) + nullr.sum(dim="time") + missing_days = count - validr.sum(dim="time") # Check if more than threshold is missing cond1 = missing_days >= self.options["nm"] # Check for consecutive invalid values # FIXME: This does not take holes in consideration - longest_run = resample_map(null, "time", freq, rl.longest_run, map_blocks=True) + longest_run = resample_map( + ~valid, "time", freq, rl.longest_run, map_blocks=True + ) cond2 = longest_run >= self.options["nc"] return cond1 | cond2 @@ -490,13 +481,14 @@ def validate(tolerance: float, subfreq: str | None = None): return 0 <= tolerance <= 1 def is_missing( - self, null: xr.DataArray, count: xr.DataArray, freq: str | None + self, valid: xr.DataArray, count: xr.DataArray, freq: str | None ) -> xr.DataArray: if freq is not None: - null = null.resample(time=freq) + valid = valid.resample(time=freq) - n = count - null.count(dim="time").fillna(0) + null.sum(dim="time").fillna(0) - return n / count >= self.options["tolerance"] + # Total number of missing or invalid days + missing_days = (count - valid.sum(dim="time")).fillna(count) + return (missing_days / count) >= self.options["tolerance"] @register_missing_method("at_least_n") @@ -522,9 +514,8 @@ def validate(n: int, subfreq: str | None = None): return n > 0 def is_missing( - self, null: xr.DataArray, count: xr.DataArray, freq: str | None + self, valid: xr.DataArray, count: xr.DataArray, freq: str | None ) -> xr.DataArray: - valid = ~null if freq is not None: valid = valid.resample(time=freq) nvalid = valid.sum(dim="time") diff --git a/tests/test_missing.py b/tests/test_missing.py index 1eb389db4..aedbf8b7a 100644 --- a/tests/test_missing.py +++ b/tests/test_missing.py @@ -105,9 +105,6 @@ def test_month(self, tasmin_series): miss = missing.missing_any(ts, freq="YS", month=8) np.testing.assert_equal(miss, [True]) - with pytest.raises(ValueError, match=r"No data for selected period."): - missing.missing_any(ts, freq="YS", month=1) - miss = missing.missing_any(ts, freq="YS", month=[7, 8]) np.testing.assert_equal(miss, [True]) @@ -117,17 +114,14 @@ def test_month(self, tasmin_series): @pytest.mark.parametrize("calendar", ("proleptic_gregorian", "noleap", "360_day")) def test_season(self, tasmin_series, calendar): - ts = tasmin_series(np.zeros(360)) + ts = tasmin_series(np.zeros(360), start="2000-01-01") ts = ts.convert_calendar(calendar, missing=0, align_on="date") miss = missing.missing_any(ts, freq="YS", season="MAM") - np.testing.assert_equal(miss, [False]) + np.testing.assert_array_equal(miss, [False]) - miss = missing.missing_any(ts, freq="YS", season="JJA") - np.testing.assert_array_equal(miss, [True, True]) - - miss = missing.missing_any(ts, freq="YS", season="SON") - np.testing.assert_equal(miss, [False]) + miss = missing.missing_any(ts, freq="YS", season="DJF") + np.testing.assert_array_equal(miss, [True]) def test_no_freq(self, tasmin_series): ts = tasmin_series(np.zeros(360)) @@ -154,9 +148,6 @@ def test_hydro(self, open_dataset): np.testing.assert_array_equal(miss[:-1], False) np.testing.assert_array_equal(miss[-1], True) - miss = missing.missing_any(ds.q_sim, freq="YS", season="JJA") - np.testing.assert_array_equal(miss, False) - def test_hourly(self, pr_hr_series): a = np.arange(2.0 * 32 * 24) a[5:10] = np.nan @@ -245,12 +236,6 @@ def test_hourly(self, pr_hr_series): out = missing.at_least_n_valid(pr, freq="MS", n=25 * 24) np.testing.assert_array_equal(out, [True, False, True]) - def test_missing_period(self, tas_series): - tas = tas_series(np.ones(366), start="2000-01-01") - tas = tas.sel(time=tas.time.dt.month.isin([1, 2, 3, 4, 12])) - out = missing.missing_pct(tas, freq="MS", tolerance=0.9, src_timestep="D") - np.testing.assert_array_equal(out, [False] * 4 + [True] * 7 + [False]) - class TestHourly: """Test that missing algorithms also work on resampling from hourly to daily.""" diff --git a/tests/test_options.py b/tests/test_options.py index e7fa6e1da..d2fd930cf 100644 --- a/tests/test_options.py +++ b/tests/test_options.py @@ -69,7 +69,7 @@ def test_set_options_invalid(option, value): def test_register_missing_method(): @register_missing_method("test") class MissingTest(MissingBase): - def is_missing(self, null, count, a_param=2): + def is_missing(self, null, count, freq): return True @staticmethod From 9fb56a2ac783f9b41689f52f4bb9df9c7bfbfddc Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 12 Feb 2025 16:51:49 -0500 Subject: [PATCH 13/18] upd nb example --- docs/notebooks/customize.ipynb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/notebooks/customize.ipynb b/docs/notebooks/customize.ipynb index cdb4d93bc..6d1abf138 100644 --- a/docs/notebooks/customize.ipynb +++ b/docs/notebooks/customize.ipynb @@ -157,7 +157,7 @@ "\n", "Finally, it is possible for advanced users to register their own methods. Xclim's missing methods are in fact class-based. To create a custom missing class, one should implement a subclass of `xclim.core.checks.MissingBase` and override at least the `is_missing` method. This method should take the following arguments:\n", "\n", - "- `null`, a `DataArray` of the mask of invalid values in the input data array (with the same time coordinate as the raw data).\n", + "- `valid`, a `DataArray` of the mask of valid values in the input data array (with the same time coordinate as the raw data).\n", "- `count`, `DataArray` of the number of days in each resampled periods\n", "- `freq`, the resampling frequency.\n", "\n", @@ -185,8 +185,9 @@ " def __init__(self, max_n: int = 5):\n", " super().__init__(max_n=max_n)\n", "\n", - " def is_missing(self, null, count, freq):\n", + " def is_missing(self, valid, count, freq):\n", " \"\"\"Return a boolean mask where True values are for elements that are considered missing and masked on the output.\"\"\"\n", + " null = ~valid\n", " return (\n", " null.resample(time=freq).map(longest_run, dim=\"time\")\n", " >= self.options[\"max_n\"]\n", From 3446d36f265f4ec4067092cb93611212b83736c3 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Wed, 12 Feb 2025 17:11:19 -0500 Subject: [PATCH 14/18] add simple tests for DA indexing --- tests/test_indicators.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/test_indicators.py b/tests/test_indicators.py index 7c4295fdc..7af88b5b1 100644 --- a/tests/test_indicators.py +++ b/tests/test_indicators.py @@ -860,6 +860,43 @@ def test_resampling_indicator_with_indexing(tas_series): np.testing.assert_allclose(out, [32, 33]) +def test_indicator_indexing_doy_bounds_spatial(tasmin_series): + da = tasmin_series(np.ones(730), start="2005-01-01", units="°C").expand_dims( + lat=[0, 10, 15, 20, 25] + ) + + start = xr.DataArray( + [50, 340, 100, np.nan, np.nan], dims=("lat",), coords={"lat": da.lat} + ) + end = xr.DataArray( + [200, 20, np.nan, 200, np.nan], dims=("lat",), coords={"lat": da.lat} + ) + out = atmos.tn_days_above(da, thresh="0 °C", doy_bounds=(start, end)) + + np.testing.assert_array_equal( + out, + [[151.0, 151.0], [46.0, 46.0], [266.0, 266.0], [200.0, 200.0], [365.0, 365.0]], + ) + + +def test_indicator_indexing_doy_bounds_temporal(tasmin_series): + da = tasmin_series(np.ones(365 * 5 + 1), start="2005-01-01", units="°C") + + time = xr.date_range("2005-01-01", freq="YS", periods=5) + start = xr.DataArray( + [50, 340, 100, np.nan, np.nan], dims=("time",), coords={"time": time} + ) + end = xr.DataArray( + [200, 20, np.nan, 200, np.nan], dims=("time",), coords={"time": time} + ) + out = atmos.tn_days_above(da, thresh="0 °C", doy_bounds=(start, end)) + + # 340, 20 is an invalid indexer for freq YS. + # such cases return an entirely masked array + # No values are missing as there are no values to count + np.testing.assert_array_equal(out, [151, 0, 266, 200, 365]) + + def test_all_inputs_known(): var_and_inds = list_input_variables() known_vars = ( From ac656b085e43b79df801372bd040d7e3f0daed8b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Feb 2025 13:26:44 +0000 Subject: [PATCH 15/18] Bump the actions group with 2 updates Bumps the actions group with 2 updates: [step-security/harden-runner](https://github.com/step-security/harden-runner) and [actions/create-github-app-token](https://github.com/actions/create-github-app-token). Updates `step-security/harden-runner` from 2.10.4 to 2.11.0 - [Release notes](https://github.com/step-security/harden-runner/releases) - [Commits](https://github.com/step-security/harden-runner/compare/cb605e52c26070c328afc4562f0b4ada7618a84e...4d991eb9b905ef189e4c376166672c3f2f230481) Updates `actions/create-github-app-token` from 1.11.3 to 1.11.5 - [Release notes](https://github.com/actions/create-github-app-token/releases) - [Commits](https://github.com/actions/create-github-app-token/compare/67e27a7eb7db372a1c61a7f9bdab8699e9ee57f7...0d564482f06ca65fa9e77e2510873638c82206f2) --- updated-dependencies: - dependency-name: step-security/harden-runner dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: actions/create-github-app-token dependency-type: direct:production update-type: version-update:semver-patch dependency-group: actions ... Signed-off-by: dependabot[bot] --- .github/workflows/add-to-project.yml | 4 ++-- .github/workflows/bump-version.yml | 4 ++-- .github/workflows/cache-cleaner.yml | 2 +- .github/workflows/codeql.yml | 2 +- .github/workflows/dependency-review.yml | 2 +- .github/workflows/first-pull-request.yml | 2 +- .github/workflows/label-on-approval.yml | 4 ++-- .github/workflows/label.yml | 2 +- .github/workflows/main.yml | 10 +++++----- .github/workflows/publish-mastodon.yml | 2 +- .github/workflows/publish-pypi.yml | 2 +- .github/workflows/scorecard.yml | 2 +- .github/workflows/tag-testpypi.yml | 2 +- .github/workflows/testdata-version.yml | 2 +- .github/workflows/upstream.yml | 2 +- .github/workflows/workflow-warning.yml | 2 +- 16 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/workflows/add-to-project.yml b/.github/workflows/add-to-project.yml index 34d716476..0c51fad3f 100644 --- a/.github/workflows/add-to-project.yml +++ b/.github/workflows/add-to-project.yml @@ -16,7 +16,7 @@ jobs: repository-projects: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block @@ -25,7 +25,7 @@ jobs: - name: Generate App Token id: token_generator - uses: actions/create-github-app-token@67e27a7eb7db372a1c61a7f9bdab8699e9ee57f7 # v1.11.3 + uses: actions/create-github-app-token@0d564482f06ca65fa9e77e2510873638c82206f2 # v1.11.5 with: app-id: ${{ secrets.OURANOS_HELPER_BOT_ID }} private-key: ${{ secrets.OURANOS_HELPER_BOT_KEY }} diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml index 9d53acffb..75e457cdf 100644 --- a/.github/workflows/bump-version.yml +++ b/.github/workflows/bump-version.yml @@ -37,7 +37,7 @@ jobs: actions: read steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block @@ -48,7 +48,7 @@ jobs: pypi.org:443 - name: Generate App Token id: token_generator - uses: actions/create-github-app-token@67e27a7eb7db372a1c61a7f9bdab8699e9ee57f7 # v1.11.3 + uses: actions/create-github-app-token@0d564482f06ca65fa9e77e2510873638c82206f2 # v1.11.5 with: app-id: ${{ secrets.OURANOS_HELPER_BOT_ID }} private-key: ${{ secrets.OURANOS_HELPER_BOT_KEY }} diff --git a/.github/workflows/cache-cleaner.yml b/.github/workflows/cache-cleaner.yml index 65b67bef3..daf16c4fb 100644 --- a/.github/workflows/cache-cleaner.yml +++ b/.github/workflows/cache-cleaner.yml @@ -15,7 +15,7 @@ jobs: actions: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index b927f6cc2..17fee8326 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -40,7 +40,7 @@ jobs: - 'python' steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 0b895bcdd..3b9442a34 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/first-pull-request.yml b/.github/workflows/first-pull-request.yml index 9f12c0db5..49a79f15e 100644 --- a/.github/workflows/first-pull-request.yml +++ b/.github/workflows/first-pull-request.yml @@ -16,7 +16,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/label-on-approval.yml b/.github/workflows/label-on-approval.yml index 48d31dda6..dfc3e8219 100644 --- a/.github/workflows/label-on-approval.yml +++ b/.github/workflows/label-on-approval.yml @@ -24,7 +24,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block @@ -51,7 +51,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/label.yml b/.github/workflows/label.yml index b423ab2a7..9a27215a2 100644 --- a/.github/workflows/label.yml +++ b/.github/workflows/label.yml @@ -27,7 +27,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index acdf39466..5ca3856de 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -40,7 +40,7 @@ jobs: (github.event_name == 'push') steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block @@ -115,7 +115,7 @@ jobs: testdata-cache: [ '~/.cache/xclim-testdata' ] steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block @@ -246,7 +246,7 @@ jobs: tox-env: doctests steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: egress-policy: block allowed-endpoints: > @@ -353,7 +353,7 @@ jobs: shell: bash -l {0} steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block @@ -443,7 +443,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/publish-mastodon.yml b/.github/workflows/publish-mastodon.yml index 9ba0b307a..143045337 100644 --- a/.github/workflows/publish-mastodon.yml +++ b/.github/workflows/publish-mastodon.yml @@ -15,7 +15,7 @@ jobs: environment: production steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml index e676917a3..63c0a70ef 100644 --- a/.github/workflows/publish-pypi.yml +++ b/.github/workflows/publish-pypi.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 99569904f..148ce02b9 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -32,7 +32,7 @@ jobs: steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: egress-policy: audit diff --git a/.github/workflows/tag-testpypi.yml b/.github/workflows/tag-testpypi.yml index 635b816b3..e922f2ef4 100644 --- a/.github/workflows/tag-testpypi.yml +++ b/.github/workflows/tag-testpypi.yml @@ -18,7 +18,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/testdata-version.yml b/.github/workflows/testdata-version.yml index fe71415e2..619e485ac 100644 --- a/.github/workflows/testdata-version.yml +++ b/.github/workflows/testdata-version.yml @@ -22,7 +22,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/upstream.yml b/.github/workflows/upstream.yml index d7a02493b..d7c79d05d 100644 --- a/.github/workflows/upstream.yml +++ b/.github/workflows/upstream.yml @@ -39,7 +39,7 @@ jobs: shell: bash -l {0} steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block diff --git a/.github/workflows/workflow-warning.yml b/.github/workflows/workflow-warning.yml index 14d5f8269..3760167c3 100644 --- a/.github/workflows/workflow-warning.yml +++ b/.github/workflows/workflow-warning.yml @@ -26,7 +26,7 @@ jobs: pull-requests: write steps: - name: Harden Runner - uses: step-security/harden-runner@cb605e52c26070c328afc4562f0b4ada7618a84e # v2.10.4 + uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0 with: disable-sudo: true egress-policy: block From 38415e9f914eec38fab1e3cf8b5fbc0a4f9a0f28 Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 17 Feb 2025 10:20:21 -0500 Subject: [PATCH 16/18] add tests with include_bounds --- tests/test_generic.py | 44 +++++++++++++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/tests/test_generic.py b/tests/test_generic.py index 203cf31df..3b4524e33 100644 --- a/tests/test_generic.py +++ b/tests/test_generic.py @@ -624,7 +624,8 @@ def test_select_time_doys(self): ) xr.testing.assert_equal(out, exp) - def test_select_time_doys_2D_spatial(self): + @pytest.mark.parametrize("include_bounds", [True, False]) + def test_select_time_doys_2D_spatial(self, include_bounds): # first doy of da is 44, last is 366 da = self.series("2003-02-13", "2004-12-31", "default").expand_dims( lat=[0, 10, 15, 20, 25] @@ -641,14 +642,23 @@ def test_select_time_doys_2D_spatial(self): end = xr.DataArray( [200, 20, np.nan, 200, np.nan], dims=("lat",), coords={"lat": da.lat} ) - out = select_time(da, doy_bounds=(start, end)) - - np.testing.assert_array_equal( - out.notnull().sum("time"), - [151 * 2, 26 + 20 + 27, 266 + 267, 200 - 43 + 200, 365 - 43 + 366], - ) - - def test_select_time_doys_2D_temporal(self): + out = select_time(da, doy_bounds=(start, end), include_bounds=include_bounds) + + exp = [151 * 2, 26 + 20 + 27, 266 + 267, 200 - 43 + 200, 365 - 43 + 366] + if not include_bounds: + exp[0] = exp[0] - 4 # 2 years * 2 + exp[1] = ( + exp[1] - 3 + ) # 2 on 1st year, 1 on 2nd (end bnd is after end of data) + exp[2] = ( + exp[2] - 2 + ) # "Open" bound so always included, 1 real bnd on each year + exp[3] = exp[3] - 2 # Same + # No real bound on exp[4] + np.testing.assert_array_equal(out.notnull().sum("time"), exp) + + @pytest.mark.parametrize("include_bounds", [True, False]) + def test_select_time_doys_2D_temporal(self, include_bounds): # YS-JUL periods: # -2003: 44 to 181, 03-04: 182 to 182, 04-05: 183 to 181, 05-06: 182 to 181, 06-07: 182 to 183, 07-: 182 to 365 da = self.series("2003-02-13", "2007-12-31", "default") @@ -666,11 +676,17 @@ def test_select_time_doys_2D_temporal(self): end = xr.DataArray( [100, 20, np.nan, 200, np.nan], dims=("time",), coords={"time": time} ) - out = select_time(da, doy_bounds=(start, end)) - - np.testing.assert_array_equal( - out.notnull().resample(time="YS-JUL").sum(), [0, 51, 47, 82, 19, 184] - ) + out = select_time(da, doy_bounds=(start, end), include_bounds=include_bounds) + + exp = [0, 51, 47, 82, 19, 184] + if not include_bounds: + # No selection on year 1 + exp[1] = exp[1] - 2 # 2 real bounds + exp[2] = exp[2] - 2 # 2 real bounds + exp[3] = exp[3] - 1 # 1 real bound + exp[4] = exp[4] - 1 # Same + # No real bounds on year 6 + np.testing.assert_array_equal(out.notnull().resample(time="YS-JUL").sum(), exp) def test_select_time_dates(self): da = self.series("2003-02-13", "2004-11-01", "all_leap") From cc767b565c34c45d2ec6c67c2a806a4c9cceeeee Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Mon, 17 Feb 2025 10:56:15 -0500 Subject: [PATCH 17/18] Error on drop True and array-like doy-bounds --- src/xclim/core/calendar.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index b7e23c6b8..6883198e1 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1262,7 +1262,8 @@ def select_time( da : xr.DataArray or xr.Dataset Input data. drop : bool - Whether to drop elements outside the period of interest or to simply mask them (default). + Whether to drop elements outside the period of interest (True) or to simply mask them (False, default). + This option is incompatible with passing array-like doy_bounds. season : str or sequence of str, optional One or more of 'DJF', 'MAM', 'JJA' and 'SON'. month : int or sequence of int, optional @@ -1326,6 +1327,14 @@ def select_time( mask = da.time.dt.month.isin(month) elif doy_bounds is not None: + if ( + not (isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int)) + and drop + ): + # At least one of those is an array, this drop won't work + raise ValueError( + "Passing array-like doy bounds is incompatible with drop=True." + ) mask = mask_between_doys(da, doy_bounds, include_bounds) elif date_bounds is not None: From eb2ef2e4fc537a5e51f2f9ac60ffa3911251d685 Mon Sep 17 00:00:00 2001 From: Ouranos Helper Bot Date: Mon, 17 Feb 2025 16:20:48 +0000 Subject: [PATCH 18/18] =?UTF-8?q?Bump=20version:=200.54.1-dev.13=20?= =?UTF-8?q?=E2=86=92=200.54.1-dev.14?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ouranos Helper Bot --- pyproject.toml | 2 +- src/xclim/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b344ba32d..1da33d78c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,7 +137,7 @@ target-version = [ ] [tool.bumpversion] -current_version = "0.54.1-dev.13" +current_version = "0.54.1-dev.14" commit = true commit_args = "--no-verify --signoff" tag = false diff --git a/src/xclim/__init__.py b/src/xclim/__init__.py index 9becf7526..ec4342406 100644 --- a/src/xclim/__init__.py +++ b/src/xclim/__init__.py @@ -13,7 +13,7 @@ __author__ = """Travis Logan""" __email__ = "logan.travis@ouranos.ca" -__version__ = "0.54.1-dev.13" +__version__ = "0.54.1-dev.14" with _resources.as_file(_resources.files("xclim.data")) as _module_data: