diff --git a/docs/references.bib b/docs/references.bib index ef78be3a7..e7dd2e3cf 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -2236,6 +2236,32 @@ @article{knol_1989 number = "1", } +@article{vicente-serrano_2012, + author = {Sergio M. Vicente-Serrano and Juan I. López-Moreno and Santiago Beguería and Jorge Lorenzo-Lacruz and Cesar Azorin-Molina and Enrique Morán-Tejeda}, + title = {Accurate Computation of a Streamflow Drought Index}, + journal = {Journal of Hydrologic Engineering}, + volume = {17}, + number = {2}, + pages = {318-332}, + year = {2012}, + doi = {10.1061/(ASCE)HE.1943-5584.0000433}, + URL = {https://ascelibrary.org/doi/abs/10.1061/%28ASCE%29HE.1943-5584.0000433}, + eprint = {https://ascelibrary.org/doi/pdf/10.1061/%28ASCE%29HE.1943-5584.0000433}, + abstract = "In this study, the authors investigated an approach to calculate the standardized streamflow index (SSI), which allows accurate spatial and temporal comparison of the hydrological conditions of a stream or set of streams. For this purpose, the capability of six three-parameter distributions (lognormal, Pearson Type III, log-logistic, general extreme value, generalized Pareto, and Weibull) and two different approaches to select the most suitable distribution the best monthly fit (BMF) and the minimum orthogonal distance (MD), were tested by using a monthly streamflow data set for the Ebro Basin (Spain). This large Mediterranean basin is characterized by high variability in the magnitude of streamflows and in seasonal regimes. The results show that the most commonly used probability distributions for flow frequency analysis provided good fits to the streamflow series. Thus, the visual inspection of the L-moment diagrams and the results of the Kolmogorov-Smirnov test did not enable the selection of a single optimum probability distribution. However, no single probability distribution for all the series was suitable for obtaining a robust standardized streamflow series because each of the distributions had one or more limitations. The BMF and MD approaches improved the results in the expected average, standard deviation, and the frequencies of extreme events of the SSI series in relation to the selection of a unique distribution for each station. The BMF and MD approaches involved using different probability distributions for each gauging station and month of the year to calculate the SSI. Both approaches are easy to apply and they provide very similar results in the quality of the obtained hydrological drought indexes. The proposed procedures are very flexible for analyses involving contrasting hydrological regimes and flow characteristics." +} + +@article{bloomfield_2013, + AUTHOR = {Bloomfield, J. P. and Marchant, B. P.}, + TITLE = {Analysis of groundwater drought building on the standardised precipitation index approach}, + JOURNAL = {Hydrology and Earth System Sciences}, + VOLUME = {17}, + YEAR = {2013}, + NUMBER = {12}, + PAGES = {4769--4787}, + URL = {https://hess.copernicus.org/articles/17/4769/2013/}, + DOI = {10.5194/hess-17-4769-2013} +} + @article{luedeling_chill_2009, title = {Climate change impacts on winter chill for temperate fruit and nut production: A review}, journal = {Scientia Horticulturae}, diff --git a/src/xclim/core/indicator.py b/src/xclim/core/indicator.py index 8e0568232..88cef9654 100644 --- a/src/xclim/core/indicator.py +++ b/src/xclim/core/indicator.py @@ -1939,3 +1939,10 @@ def _merge_attrs(dbase, dextra, attr, sep): load_locale(loc_dict, locale) return mod + + +class StandardizedIndexes(ResamplingIndicator): + """Resampling but flexible inputs indicators.""" + + src_freq = ["D", "MS"] + context = "hydro" diff --git a/src/xclim/data/fr.json b/src/xclim/data/fr.json index 8efa0c85b..d2fdc32e6 100644 --- a/src/xclim/data/fr.json +++ b/src/xclim/data/fr.json @@ -269,6 +269,12 @@ "title": "Précipitation liquide accumulée totale", "abstract": "Précipitation liquide accumulée totale. Les précipitations sont considérées liquides lorsque la température quotidienne moyenne est au-dessus de 0°C." }, + "SGI": { + "long_name": "Indice d'eau souterraine standardisé (SGI)", + "description": "Niveau de l'eau souterraine sur une fenêtre mobile de {window} {freq:nom}, normalisé telle que la moyenne du SGI est 0 pour les données de calibration. L'unité de la fenêtre 'X' est l'unité de temps déterminée par la fréquence de rééchantillonage,", + "title": "Indice d'eau souterraine standardisé (SGI)", + "abstract": "Niveau de l'eau souterraine sur une fenêtre mobile, normalisée telle que la moyenne du SGI est 0 pour les données de calibration. L'unité de la fenêtre est l'unité de temps déterminée par la fréquence de rééchantillonnage." + }, "SOLIDPRCPTOT": { "long_name": "Précipitation totale lorsque la température moyenne est en dessous ou égale à {thresh}", "description": "Précipitation solide totale {freq:f}, estimée comme la précipitation lorsque la température moyenne est en dessous ou égale à {thresh}.", @@ -287,6 +293,12 @@ "title": "Indice de précipitation évapotranspiration standardisé (SPEI)", "abstract": "Budget d'eau (précipitation - évapotranspiration) sur une fenêtre mobile, normalisé tel que la moyenne du SPEI est 0 pour les données de calibration. L'unité de la fenêtre est l'unité de temps déterminée par la fréquence de rééchantillonnage." }, + "SSI": { + "long_name": "Indice d'écoulement standardisé (SSI)", + "description": "Écoulement sur une fenêtre mobile de {window} {freq:nom}, normalisée telle que la moyenne du SSI est 0 pour les données de calibration. L'unité de la fenêtre 'X' est l'unité de temps déterminée par la fréquence de rééchantillonage,", + "title": "Indice d'écoulement standardisé (SSI)", + "abstract": "Écoulement sur une fenêtre mobile, normalisée telle que la moyenne du SSI est 0 pour les données de calibration. L'unité de la fenêtre est l'unité de temps déterminée par la fréquence de rééchantillonnage." + }, "DC": { "long_name": "Indice de sécheresse", "description": "Code numérique estimant la teneur en humidité moyenne des couches organiques.", diff --git a/src/xclim/data/variables.yml b/src/xclim/data/variables.yml index 71a7b7dd0..4ba346d95 100644 --- a/src/xclim/data/variables.yml +++ b/src/xclim/data/variables.yml @@ -30,6 +30,11 @@ variables: standard_name: water_potential_evapotranspiration_flux data_flags: - negative_accumulation_values: + gwl: + canonical_units: m + description: Groundwater level. + dimensions: "[length]" + hurs: canonical_units: '%' cell_methods: "time: mean" diff --git a/src/xclim/indicators/atmos/_precip.py b/src/xclim/indicators/atmos/_precip.py index cd83ad218..8fdfc7077 100644 --- a/src/xclim/indicators/atmos/_precip.py +++ b/src/xclim/indicators/atmos/_precip.py @@ -10,8 +10,8 @@ Daily, Hourly, Indicator, - ResamplingIndicator, ResamplingIndicatorWithIndexing, + StandardizedIndexes, ) from xclim.core.utils import InputKind @@ -115,13 +115,6 @@ def cfcheck(pr: DataArray, tas: DataArray): cfchecks.check_valid(tas, "standard_name", "air_temperature") -class StandardizedIndexes(ResamplingIndicator): - """Resampling but flexible inputs indicators.""" - - src_freq = ["D", "MS"] - context = "hydro" - - class HrPrecip(Hourly): """Indicator involving hourly pr series.""" @@ -372,6 +365,7 @@ class HrPrecip(Hourly): ) standardized_precipitation_index = StandardizedIndexes( + realm="atmos", title="Standardized Precipitation Index (SPI)", identifier="spi", units="", @@ -387,6 +381,7 @@ class HrPrecip(Hourly): ) standardized_precipitation_evapotranspiration_index = StandardizedIndexes( + realm="atmos", title="Standardized Precipitation Evapotranspiration Index (SPEI)", identifier="spei", units="", diff --git a/src/xclim/indicators/land/_streamflow.py b/src/xclim/indicators/land/_streamflow.py index 1dc5b0786..08b1b1696 100644 --- a/src/xclim/indicators/land/_streamflow.py +++ b/src/xclim/indicators/land/_streamflow.py @@ -8,6 +8,7 @@ from xclim.core.indicator import ( ReducingIndicator, ResamplingIndicator, + StandardizedIndexes, ) from xclim.core.units import declare_units from xclim.indices import ( @@ -17,6 +18,8 @@ high_flow_frequency, low_flow_frequency, rb_flashiness_index, + standardized_groundwater_index, + standardized_streamflow_index, ) __all__ = [ @@ -27,6 +30,8 @@ "high_flow_frequency", "low_flow_frequency", "rb_flashiness_index", + "standardized_groundwater_index", + "standardized_streamflow_index", ] @@ -97,6 +102,7 @@ def cfcheck(q: DataArray): parameters={"op": generic.doymin, "out_units": None}, ) + flow_index = ReducingIndicator( realm="land", context="hydro", @@ -130,3 +136,36 @@ def cfcheck(q: DataArray): units="days", compute=low_flow_frequency, ) + +standardized_streamflow_index = StandardizedIndexes( + realm="land", + title="Standardized Streamflow Index (SSI)", + identifier="ssi", + units="", + standard_name="ssi", + long_name="Standardized Streamflow Index (SSI)", + description="Streamflow over a moving {window}-X window, normalized such that SSI averages to 0 for " + "calibration data. The window unit `X` is the minimal time period defined by resampling frequency {freq}.", + abstract="Streamflow over a moving window, normalized such that SSI averages to 0 for the calibration data. " + "The window unit `X` is the minimal time period defined by the resampling frequency.", + cell_methods="", + keywords="streamflow", + compute=standardized_streamflow_index, +) + + +standardized_groundwater_index = StandardizedIndexes( + realm="land", + title="Standardized Groundwater Index (SGI)", + identifier="sgi", + units="", + standard_name="sgi", + long_name="Standardized Groundwater Index (SGI)", + description="Groundwater over a moving {window}-X window, normalized such that SGI averages to 0 for " + "calibration data. The window unit `X` is the minimal time period defined by resampling frequency {freq}.", + abstract="Groundwater over a moving window, normalized such that SGI averages to 0 for the calibration data. " + "The window unit `X` is the minimal time period defined by the resampling frequency.", + cell_methods="", + keywords="groundwater", + compute=standardized_groundwater_index, +) diff --git a/src/xclim/indices/_hydrology.py b/src/xclim/indices/_hydrology.py index 13758e62f..4c5346481 100644 --- a/src/xclim/indices/_hydrology.py +++ b/src/xclim/indices/_hydrology.py @@ -4,11 +4,14 @@ import numpy as np import xarray as xr +from scipy.stats import rv_continuous +from xclim.core._types import DateStr, Quantified from xclim.core.calendar import get_calendar from xclim.core.missing import at_least_n_valid from xclim.core.units import declare_units, rate2amount, to_agg_units from xclim.indices.generic import threshold_count +from xclim.indices.stats import standardized_index from . import generic @@ -24,6 +27,8 @@ "snow_melt_we_max", "snw_max", "snw_max_doy", + "standardized_groundwater_index", + "standardized_streamflow_index", ] @@ -109,6 +114,136 @@ def rb_flashiness_index(q: xr.DataArray, freq: str = "YS") -> xr.DataArray: return out +@declare_units( + q="[discharge]", + params="[]", +) +def standardized_streamflow_index( + q: xr.DataArray, + freq: str | None = "MS", + window: int = 1, + dist: str | rv_continuous = "genextreme", + method: str = "ML", + fitkwargs: dict | None = None, + cal_start: DateStr | None = None, + cal_end: DateStr | None = None, + params: Quantified | None = None, + **indexer, +) -> xr.DataArray: + r""" + Standardized Streamflow Index (SSI). + + Parameters + ---------- + q : xarray.DataArray + Rate of river discharge. + freq : str, optional + Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling + has already been applied input dataset and will skip the resampling step. + window : int + Averaging window length relative to the resampling frequency. For example, if `freq="MS"`, + i.e. a monthly resampling, the window is an integer number of months. + dist : {"genextreme", "fisk"} or `rv_continuous` function + Name of the univariate distribution, or a callable `rv_continuous` (see :py:mod:`scipy.stats`). + method : {"APP", "ML", "PWM"} + Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method + uses a deterministic function that does not involve any optimization. + `PWM` should be used with a `lmoments3` distribution. + fitkwargs : dict, optional + Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). + cal_start : DateStr, optional + Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period begins at the start of the input dataset. + cal_end : DateStr, optional + End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period finishes at the end of the input dataset. + params : xarray.DataArray, optional + Fit parameters. + The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. + The output can be given here as input, and it overrides other options. + **indexer : Indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + + Returns + ------- + xarray.DataArray, [unitless] + Standardized Streamflow Index. + + See Also + -------- + xclim.indices._agro.standardized_precipitation_index : Standardized Precipitation Index. + xclim.indices.stats.standardized_index : Standardized Index. + xclim.indices.stats.standardized_index_fit_params : Standardized Index Fit Params. + + Notes + ----- + * N-month SSI / N-day SSI is determined by choosing the `window = N` and the appropriate frequency `freq`. + * Supported statistical distributions are: ["genextreme", "fisk"], where "fisk" is scipy's implementation of + a log-logistic distribution. + * If `params` is provided, it overrides the `cal_start`, `cal_end`, `freq`, `window`, `dist`, and `method` options. + * "APP" method only supports two-parameter distributions. Parameter `loc` needs to be fixed to use method "APP". + * The standardized index is bounded by ±8.21. 8.21 is the largest standardized index as constrained by the + float64 precision in the inversion to the normal distribution. + + References + ---------- + :cite:cts:`vicente-serrano_2012` + + Examples + -------- + >>> from datetime import datetime + >>> from xclim.indices import standardized_streamflow_index + >>> ds = xr.open_dataset(path_to_q_file) + >>> q = ds.q + >>> cal_start, cal_end = "2006-05-01", "2008-06-01" + >>> ssi_3 = standardized_streamflow_index( + ... q, + ... freq="MS", + ... window=3, + ... dist="genextreme", + ... method="ML", + ... cal_start=cal_start, + ... cal_end=cal_end, + ... ) # Computing SSI-3 months using a GEV distribution for the fit + >>> # Fitting parameters can also be obtained first, then reused as input. + >>> from xclim.indices.stats import standardized_index_fit_params + >>> params = standardized_index_fit_params( + ... q.sel(time=slice(cal_start, cal_end)), + ... freq="MS", + ... window=3, + ... dist="genextreme", + ... method="ML", + ... ) # First getting params + >>> ssi_3 = standardized_streamflow_index(q, params=params) + """ + fitkwargs = fitkwargs or {} + dist_methods = {"genextreme": ["ML", "APP"], "fisk": ["ML", "APP"]} + if isinstance(dist, str): + if dist in dist_methods: + if method not in dist_methods[dist]: + raise NotImplementedError(f"{method} method is not implemented for {dist} distribution") + else: + raise NotImplementedError(f"{dist} distribution is not yet implemented.") + + zero_inflated = False + ssi = standardized_index( + q, + freq=freq, + window=window, + dist=dist, + method=method, + zero_inflated=zero_inflated, + fitkwargs=fitkwargs, + cal_start=cal_start, + cal_end=cal_end, + params=params, + **indexer, + ) + + return ssi + + @declare_units(snd="[length]") def snd_max(snd: xr.DataArray, freq: str = "YS-JUL") -> xr.DataArray: """ @@ -284,6 +419,138 @@ def melt_and_precip_max(snw: xr.DataArray, pr: xr.DataArray, window: int = 3, fr return out +@declare_units( + gwl="[length]", + params="[]", +) +def standardized_groundwater_index( + gwl: xr.DataArray, + freq: str | None = "MS", + window: int = 1, + dist: str | rv_continuous = "genextreme", + method: str = "ML", + fitkwargs: dict | None = None, + cal_start: DateStr | None = None, + cal_end: DateStr | None = None, + params: Quantified | None = None, + **indexer, +) -> xr.DataArray: + r""" + Standardized Groundwater Index (SGI). + + Parameters + ---------- + gwl : xarray.DataArray + Groundwater head level. + freq : str, optional + Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes that desired resampling + has already been applied input dataset and will skip the resampling step. + window : int + Averaging window length relative to the resampling frequency. For example, if `freq="MS"`, + i.e. a monthly resampling, the window is an integer number of months. + dist : {"gamma", "genextreme", "lognorm"} or `rv_continuous` + Name of the univariate distribution, or a callable `rv_continuous` (see :py:mod:`scipy.stats`). + method : {"APP", "ML", "PWM"} + Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). + The approximate method uses a deterministic function that does not involve any optimization. + `PWM` should be used with a `lmoments3` distribution. + fitkwargs : dict, optional + Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certains parameters (`floc`, `fscale`). + cal_start : DateStr, optional + Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period begins at the start of the input dataset. + cal_end : DateStr, optional + End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`. + Default option `None` means that the calibration period finishes at the end of the input dataset. + params : xarray.DataArray, optional + Fit parameters. + The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance. + The output can be given here as input, and it overrides other options. + **indexer : Indexer + Indexing parameters to compute the indicator on a temporal subset of the data. + It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`. + + Returns + ------- + xarray.DataArray, [unitless] + Standardized Groundwater Index. + + See Also + -------- + xclim.indices._agro.standardized_precipitation_index : Standardized Precipitation Index. + xclim.indices.stats.standardized_index : Standardized Index. + xclim.indices.stats.standardized_index_fit_params : Standardized Index Fit Params. + + Notes + ----- + * N-month SGI / N-day SGI is determined by choosing the `window = N` and the appropriate frequency `freq`. + * Supported statistical distributions are: ["gamma", "genextreme", "lognorm"]. + * If `params` is provided, it overrides the `cal_start`, `cal_end`, `freq`, `window`, `dist`, `method` options. + * "APP" method only supports two-parameter distributions. Parameter `loc` needs to be fixed to use method "APP". + + References + ---------- + :cite:cts:`bloomfield_2013` + + Examples + -------- + >>> from datetime import datetime + >>> from xclim.indices import standardized_groundwater_index + >>> ds = xr.open_dataset(path_to_gwl_file) + >>> gwl = ds.gwl + >>> cal_start, cal_end = "2006-05-01", "2008-06-01" + >>> sgi_3 = standardized_groundwater_index( + ... gwl, + ... freq="MS", + ... window=3, + ... dist="gamma", + ... method="ML", + ... cal_start=cal_start, + ... cal_end=cal_end, + ... ) # Computing SGI-3 months using a Gamma distribution for the fit + >>> # Fitting parameters can also be obtained first, then reused as input. + >>> from xclim.indices.stats import standardized_index_fit_params + >>> params = standardized_index_fit_params( + ... gwl.sel(time=slice(cal_start, cal_end)), + ... freq="MS", + ... window=3, + ... dist="gamma", + ... method="ML", + ... ) # First getting params + >>> sgi_3 = standardized_groundwater_index(gwl, params=params) + """ + fitkwargs = fitkwargs or {} + + dist_methods = { + "gamma": ["ML", "APP"], + "genextreme": ["ML", "APP"], + "lognorm": ["ML", "APP"], + } + if isinstance(dist, str): + if dist in dist_methods: + if method not in dist_methods[dist]: + raise NotImplementedError(f"{method} method is not implemented for {dist} distribution") + else: + raise NotImplementedError(f"{dist} distribution is not yet implemented.") + + zero_inflated = False + sgi = standardized_index( + gwl, + freq=freq, + window=window, + dist=dist, + method=method, + zero_inflated=zero_inflated, + fitkwargs=fitkwargs, + cal_start=cal_start, + cal_end=cal_end, + params=params, + **indexer, + ) + + return sgi + + @declare_units(q="[discharge]") def flow_index(q: xr.DataArray, p: float = 0.95) -> xr.DataArray: """ diff --git a/src/xclim/indices/stats.py b/src/xclim/indices/stats.py index d563fb7f3..85566f219 100644 --- a/src/xclim/indices/stats.py +++ b/src/xclim/indices/stats.py @@ -589,6 +589,24 @@ def _fit_start(x, dist: str, **fitkwargs: Any) -> tuple[tuple, dict]: c0 = np.pi * m / np.sqrt(3) / np.sqrt(m2 - m**2) kwargs = {"scale": scale0, "loc": loc0} return (c0,), kwargs + + if dist in ["lognorm"]: + if "floc" in fitkwargs: + loc0 = fitkwargs["floc"] + else: + # muralidhar_1992 + xs = sorted(x) + x1, xn, xp = xs[0], xs[-1], xs[int(len(x) / 2)] + loc0 = (x1 * xn - xp**2) / (x1 + xn - 2 * xp) + x_pos = x - loc0 + x_pos = x_pos[x_pos > 0] + # MLE estimation + log_x_pos = np.log(x_pos) + shape0 = log_x_pos.std() + scale0 = np.exp(log_x_pos.mean()) + kwargs = {"scale": scale0, "loc": loc0} + return (shape0,), kwargs + return (), {} @@ -813,11 +831,17 @@ def standardized_index_fit_params( if method == "APP": if "floc" not in fitkwargs.keys(): raise ValueError( - "The APP method is only supported for two-parameter distributions with `gamma` or `fisk` " - "with `loc` being fixed. Pass a value for `floc` in `fitkwargs`." + "The APP method is only supported for two-parameter distributions with `gamma`, `fisk`, " + "`lognorm`, or `genextreme` with `loc` being fixed. Pass a value for `floc` in `fitkwargs`." ) - dist_and_methods = {"gamma": ["ML", "APP"], "fisk": ["ML", "APP"]} + # "WPM" method doesn't seem to work for gamma or pearson3 + dist_and_methods = { + "gamma": ["ML", "APP"], + "fisk": ["ML", "APP"], + "genextreme": ["ML", "APP"], + "lognorm": ["ML", "APP"], + } dist = get_dist(dist) if method != "PWM": if dist.name not in dist_and_methods: diff --git a/tests/test_hydrology.py b/tests/test_hydrology.py index 0572d12e1..4bc7b4aa0 100644 --- a/tests/test_hydrology.py +++ b/tests/test_hydrology.py @@ -1,8 +1,13 @@ from __future__ import annotations +from pathlib import Path + import numpy as np +import pytest from xclim import indices as xci +from xclim import land +from xclim.core.units import convert_units_to class TestBaseFlowIndex: @@ -23,6 +28,37 @@ def test_simple(self, q_series): np.testing.assert_array_equal(out, 2) +class TestStandardizedStreamflow: + @pytest.mark.slow + def test_3d_data_with_nans(self, open_dataset): + nc_ds = Path("Raven", "q_sim.nc") + # test with data + ds = open_dataset(nc_ds) + q = ds.q_obs.sel(time=slice("2008")).rename("q") + qMM = convert_units_to(q, "mm**3/s", context="hydro") # noqa + # put a nan somewhere + qMM.values[10] = np.nan + q.values[10] = np.nan + + out1 = land.standardized_streamflow_index( + q, + freq="MS", + window=1, + dist="genextreme", + method="APP", + fitkwargs={"floc": 0}, + ) + out2 = land.standardized_streamflow_index( + qMM, + freq="MS", + window=1, + dist="genextreme", + method="APP", + fitkwargs={"floc": 0}, + ) + np.testing.assert_array_almost_equal(out1, out2, 3) + + class TestSnwMax: def test_simple(self, snw_series): a = np.zeros(366) diff --git a/tests/test_indices.py b/tests/test_indices.py index 98d61008b..00faaee64 100644 --- a/tests/test_indices.py +++ b/tests/test_indices.py @@ -28,7 +28,7 @@ from xclim.core import ValidationError from xclim.core.calendar import percentile_doy from xclim.core.options import set_options -from xclim.core.units import convert_units_to, units +from xclim.core.units import convert_units_to, rate2amount, units from xclim.indices import prsnd_to_prsn K2C = 273.15 @@ -841,6 +841,286 @@ def test_standardized_precipitation_evapotranspiration_index( np.testing.assert_allclose(spei.values, values, rtol=0, atol=diff_tol) + @pytest.mark.slow + @pytest.mark.parametrize( + "freq, window, dist, method, values, diff_tol", + [ + # reference results: Obtained with R package `standaRdized` + ( + "D", + 1, + "genextreme", + "ML", + [0.5331, 0.5338, 0.5098, 0.4656, 0.4937], + 9e-2, + ), + ( + "D", + 12, + "genextreme", + "ML", + [0.4414, 0.4695, 0.4861, 0.4838, 0.4877], + 9e-2, + ), + # reference results : xclim version where the test was implemented + ( + "D", + 1, + "genextreme", + "ML", + [0.6105, 0.6167, 0.5957, 0.5520, 0.5794], + 2e-2, + ), + ( + "D", + 1, + "genextreme", + "APP", + [-0.0259, -0.0141, -0.0080, -0.0098, 0.0089], + 2e-2, + ), + ("D", 1, "fisk", "ML", [0.3514, 0.3741, 0.1349, 0.4332, 0.1724], 2e-2), + ("D", 1, "fisk", "APP", [0.3321, 0.3477, 0.3536, 0.3468, 0.3723], 2e-2), + ( + "D", + 12, + "genextreme", + "ML", + [0.5131, 0.5442, 0.5645, 0.5660, 0.5720], + 2e-2, + ), + ( + "D", + 12, + "genextreme", + "APP", + [-0.0697, -0.0550, -0.0416, -0.0308, -0.0194], + 2e-2, + ), + ("D", 12, "fisk", "ML", [0.2096, 0.2728, 0.3259, 0.3466, 0.2836], 2e-2), + ("D", 12, "fisk", "APP", [0.2667, 0.2893, 0.3088, 0.3233, 0.3385], 2e-2), + ( + "MS", + 1, + "genextreme", + "ML", + [0.7315, -1.4919, -0.5405, 0.9965, -0.7449], + 2e-2, + ), + ( + "MS", + 1, + "genextreme", + "APP", + [0.0979, -1.6806, -0.5345, 0.7355, -0.7583], + 2e-2, + ), + ( + "MS", + 1, + "fisk", + "ML", + [0.533154, -1.5777, -0.436331, 0.29581, -0.814988], + 2e-2, + ), + ("MS", 1, "fisk", "APP", [0.4663, -1.9076, -0.5362, 0.8070, -0.8035], 2e-2), + ( + "MS", + 12, + "genextreme", + "ML", + [-0.9795, -1.0398, -1.9019, -1.6970, -1.4761], + 2e-2, + ), + ( + "MS", + 12, + "genextreme", + "APP", + [-0.9095, -1.0996, -1.9207, -2.2665, -2.1746], + 2e-2, + ), + ( + "MS", + 12, + "fisk", + "ML", + [-1.0776, -1.0827, -1.9333, -1.7764, -1.8391], + 2e-2, + ), + ( + "MS", + 12, + "fisk", + "APP", + [-0.9607, -1.1265, -1.7004, -1.8747, -1.8132], + 2e-2, + ), + ], + ) + def test_standardized_streamflow_index(self, open_dataset, freq, window, dist, method, values, diff_tol): + ds = open_dataset("Raven/q_sim.nc") + q = ds.q_obs.rename("q") + q_cal = ds.q_sim.rename("q").fillna(ds.q_sim.mean()) + if freq == "D": + q = q.sel(time=slice("2008-01-01", "2008-01-30")).fillna(ds.q_obs.mean()) + else: + q = q.sel(time=slice("2008-01-01", "2009-12-31")).fillna(ds.q_obs.mean()) + fitkwargs = {"floc": 0} if method == "APP" else {} + params = xci.stats.standardized_index_fit_params( + q_cal, + freq=freq, + window=window, + dist=dist, + method=method, + fitkwargs=fitkwargs, + zero_inflated=True, + ) + ssi = xci.standardized_streamflow_index(q, params=params) + ssi = ssi.isel(time=slice(-11, -1, 2)).values.flatten() + + np.testing.assert_allclose(ssi, values, rtol=0, atol=diff_tol) + + # TODO: Find another package to test against + # For now, we just take a snapshot of what xclim produces when this function + # was added + @pytest.mark.slow + @pytest.mark.parametrize( + "freq, window, dist, method, values, diff_tol", + [ + ( + "MS", + 12, + "gamma", + "APP", + [0.598212, 1.559759, 1.693086, 0.996405, 0.702797], + 2e-2, + ), + ( + "MS", + 12, + "gamma", + "ML", + [0.626364, 1.534493, 1.67347, 0.996994, 0.701663], + 0.04, + ), + ( + "D", + 12, + "gamma", + "APP", + [-0.244186, -0.114052, 0.649965, 1.0767, 0.64628], + 2e-2, + ), + ( + "D", + 12, + "gamma", + "ML", + [-0.158894, -0.048991, 0.677816, 0.960244, 0.6606], + 2e-2, + ), + ( + "MS", + 12, + "lognorm", + "APP", + [0.61497, 1.529579, 1.649581, 0.995286, 0.70964], + 2e-2, + ), + ( + "MS", + 12, + "lognorm", + "ML", + [0.631899, 1.532513, 1.691952, 0.997743, 0.68918], + 0.04, + ), + ( + "D", + 12, + "lognorm", + "APP", + [-0.151103, -0.015488, 0.700311, 1.097219, 0.695601], + 2e-2, + ), + ( + "D", + 12, + "lognorm", + "ML", + [-0.156812, -0.041269, 0.694897, 1.057204, 0.690219], + 2e-2, + ), + ( + "MS", + 12, + "genextreme", + "ML", + [0.626085, 1.549041, 1.740867, 0.982377, 0.658135], + 2e-2, + ), + ( + "D", + 12, + "genextreme", + "ML", + [-0.172489, -0.051075, 0.71535, 1.087408, 0.706588], + 2e-2, + ), + ( + "D", + 12, + "genextreme", + "APP", + [-0.27661, -0.145505, 0.707217, 1.178342, 0.711621], + 2e-2, + ), + ( + "W", + 1, + "gamma", + "ML", + [0.64676962, -0.06904886, -1.60493289, 1.07864037, -0.01415902], + 2e-2, + ), + ], + ) + def test_standardized_groundwater_index(self, open_dataset, freq, window, dist, method, values, diff_tol): + if method == "ML" and freq == "D" and Version(__numpy_version__) < Version("2.0.0"): + pytest.skip("Skipping SPI/ML/D on older numpy") + ds = open_dataset("sdba/CanESM2_1950-2100.nc").isel(location=1) + if freq == "D": + # to compare with ``climate_indices`` + ds = ds.convert_calendar("366_day", missing=np.nan) + elif freq == "W": + # only standard calendar supported with freq="W" + ds = ds.convert_calendar("standard", missing=np.nan, align_on="year", use_cftime=False) + gwl0 = rate2amount(convert_units_to(ds.pr, "mm/d")) + + gwl = gwl0.sel(time=slice("1998", "2000")) + + gwl_cal = gwl0.sel(time=slice("1950", "1980")) + fitkwargs = {} + if method == "APP": + fitkwargs["floc"] = 0 + params = xci.stats.standardized_index_fit_params( + gwl_cal, + freq=freq, + window=window, + dist=dist, + method=method, + fitkwargs=fitkwargs, + zero_inflated=True, + ) + sgi = xci.standardized_groundwater_index(gwl, params=params) + # Only a few moments before year 2000 are tested + sgi = sgi.isel(time=slice(-11, -1, 2)) + + sgi = sgi.clip(-3.09, 3.09) + + np.testing.assert_allclose(sgi.values, values, rtol=0, atol=diff_tol) + @pytest.mark.parametrize( "indexer", [