From d83925095490fa5b7ff689b0a138c34edc8df64c Mon Sep 17 00:00:00 2001 From: Gian Wiher Date: Tue, 1 Mar 2022 16:58:29 +0100 Subject: [PATCH 01/13] moved from forked branch --- .../forecasting/gradient_boosted_model.py | 199 +++++++++++++++++- .../forecasting/test_regression_models.py | 89 +++++++- 2 files changed, 286 insertions(+), 2 deletions(-) diff --git a/darts/models/forecasting/gradient_boosted_model.py b/darts/models/forecasting/gradient_boosted_model.py index 9742007c96..f2ee800665 100644 --- a/darts/models/forecasting/gradient_boosted_model.py +++ b/darts/models/forecasting/gradient_boosted_model.py @@ -8,13 +8,17 @@ https://github.com/unit8co/darts/blob/master/README.md """ +from collections import OrderedDict from typing import List, Optional, Sequence, Tuple, Union import lightgbm as lgb +import numpy as np +import xarray as xr from darts.logging import get_logger from darts.models.forecasting.regression_model import RegressionModel from darts.timeseries import TimeSeries +from darts.utils.utils import raise_if_not logger = get_logger(__name__) @@ -26,6 +30,8 @@ def __init__( lags_past_covariates: Union[int, List[int]] = None, lags_future_covariates: Union[Tuple[int, int], List[int]] = None, output_chunk_length: int = 1, + likelihood: str = None, + quantiles: List[float] = None, **kwargs, ): """Light Gradient Boosted Model @@ -48,10 +54,57 @@ def __init__( Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may be useful if the covariates don't extend far enough into the future. + likelihood + The objective used by the model. Currently, only `quantile` and 'poisson' are available. Allows sampling + from the model. + quantiles + If the `likelihood` is set to `quantile`, use these quantiles to samples from. **kwargs Additional keyword arguments passed to `lightgbm.LGBRegressor`. """ self.kwargs = kwargs + self._median_idx = None + self._model_container = _LightGBMModelContainer() + self.quantiles = quantiles + self.likelihood = likelihood + self._rng = None + + # parse likelihood + available_likelihoods = ["quantile", "poisson"] # to be extended + if likelihood is not None: + raise_if_not( + likelihood in available_likelihoods, + f"If likelihood is specified it must be one of {available_likelihoods}", + ) + self.kwargs["objective"] = likelihood + self._rng = np.random.default_rng(seed=420) + if likelihood == "quantile": + if quantiles is None: + self.quantiles = [ + 0.01, + 0.05, + 0.1, + 0.15, + 0.2, + 0.25, + 0.3, + 0.4, + 0.45, + 0.5, + 0.55, + 0.6, + 0.7, + 0.75, + 0.8, + 0.85, + 0.9, + 0.95, + 0.99, + ] + else: + self.quantiles = sorted(self.quantiles) + self._check_quantiles(self.quantiles) + self._median_idx = self.quantiles.index(0.5) super().__init__( lags=lags, @@ -102,7 +155,6 @@ def fit( """ if val_series is not None: - kwargs["eval_set"] = self._create_lagged_data( target_series=val_series, past_covariates=val_past_covariates, @@ -110,6 +162,23 @@ def fit( max_samples_per_ts=max_samples_per_ts, ) + if self.likelihood == "quantile": + # empty model container in case of multiple calls to fit, e.g. when backtesting + self._model_container.clear() + for quantile in self.quantiles: + self.kwargs["alpha"] = quantile + self.model = lgb.LGBMRegressor(**self.kwargs) + + super().fit( + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, + max_samples_per_ts=max_samples_per_ts, + **kwargs, + ) + + self._model_container[quantile] = self.model + super().fit( series=series, past_covariates=past_covariates, @@ -119,3 +188,131 @@ def fit( ) return self + + def predict( + self, + n: int, + series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, + past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, + future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, + num_samples: int = 1, + **kwargs, + ) -> Union[TimeSeries, Sequence[TimeSeries]]: + """Forecasts values for `n` time steps after the end of the series. + + Parameters + ---------- + n : int + Forecast horizon - the number of time steps after the end of the series for which to produce predictions. + series : TimeSeries or list of TimeSeries, optional + Optionally, one or several input `TimeSeries`, representing the history of the target series whose future + is to be predicted. If specified, the method returns the forecasts of these series. Otherwise, the method + returns the forecast of the (single) training series. + past_covariates : TimeSeries or list of TimeSeries, optional + Optionally, the past-observed covariates series needed as inputs for the model. + They must match the covariates used for training in terms of dimension and type. + future_covariates : TimeSeries or list of TimeSeries, optional + Optionally, the future-known covariates series needed as inputs for the model. + They must match the covariates used for training in terms of dimension and type. + num_samples : int, default: 1 + Specifies the numer of samples to obtain from the model. Should be set to 1 if no `likelihood` is specified. + **kwargs : dict, optional + Additional keyword arguments passed to the `predict` method of the model. Only works with + univariate target series. + """ + + if self.likelihood == "quantile": + model_outputs = [] + for quantile, fitted in self._model_container.items(): + self.model = fitted + prediction = super().predict( + n, series, past_covariates, future_covariates, **kwargs + ) + model_outputs.append(prediction._xa.to_numpy()) + model_outputs = np.concatenate(model_outputs, axis=-1) + samples = self._sample_quantiles(model_outputs, num_samples) + # build timeseries from samples + new_xa = xr.DataArray( + samples, dims=prediction._xa.dims, coords=prediction._xa.coords + ) + return TimeSeries(new_xa) + + if self.likelihood == "poisson": + prediction = super().predict( + n, series, past_covariates, future_covariates, **kwargs + ) + samples = self._sample_poisson( + np.array(prediction._xa.to_numpy()), num_samples + ) + # build timeseries from samples + new_xa = xr.DataArray( + samples, dims=prediction._xa.dims, coords=prediction._xa.coords + ) + return TimeSeries(new_xa) + + return super().predict( + n, series, past_covariates, future_covariates, num_samples, **kwargs + ) + + def _sample_quantiles( + self, model_output: np.ndarray, num_samples: int + ) -> np.ndarray: + """ + This method is ported to numpy from the probabilistic torch models module + model_output is of shape (n_timesteps, n_components, n_quantiles) + """ + raise_if_not(all([isinstance(num_samples, int), num_samples > 0])) + quantiles = np.tile(np.array(self.quantiles), (num_samples, 1)) + probas = np.tile( + self._rng.uniform(size=(num_samples,)), (len(self.quantiles), 1) + ) + + quantile_idxs = np.sum(probas.T > quantiles, axis=1) + + # To make the sampling symmetric around the median, we assign the two "probability buckets" before and after + # the median to the median value. If we don't do that, the highest quantile would be wrongly sampled + # too often as it would capture the "probability buckets" preceding and following it. + # + # Example; the arrows shows how the buckets map to values: [--> 0.1 --> 0.25 --> 0.5 <-- 0.75 <-- 0.9 <--] + quantile_idxs = np.where( + quantile_idxs <= self._median_idx, quantile_idxs, quantile_idxs - 1 + ) + + return model_output[:, :, quantile_idxs] + + def _sample_poisson(self, model_output: np.ndarray, num_samples: int) -> np.ndarray: + raise_if_not(all([isinstance(num_samples, int), num_samples > 0])) + return self._rng.poisson( + lam=model_output, size=(*model_output.shape[:2], num_samples) + ).astype(float) + + @staticmethod + def _check_quantiles(quantiles): + raise_if_not( + all([0 < q < 1 for q in quantiles]), + "All provided quantiles must be between 0 and 1.", + ) + + # we require the median to be present and the quantiles to be symmetric around it, + # for correctness of sampling. + median_q = 0.5 + raise_if_not( + median_q in quantiles, "median quantile `q=0.5` must be in `quantiles`" + ) + is_centered = [ + -1e-6 < (median_q - left_q) + (median_q - right_q) < 1e-6 + for left_q, right_q in zip(quantiles, quantiles[::-1]) + ] + raise_if_not( + all(is_centered), + "quantiles lower than `q=0.5` need to share same difference to `0.5` as quantiles " + "higher than `q=0.5`", + ) + + +class _LightGBMModelContainer(OrderedDict): + def __init__(self): + super().__init__() + + def __str__(self): + return f"_LightGBMModelContainer(quantiles={list(self.keys())})" diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 56d694ccd8..640996965f 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -10,7 +10,8 @@ import darts from darts import TimeSeries from darts.logging import get_logger -from darts.metrics import rmse +from darts.metrics import mae, rmse +from darts.models.forecasting.forecasting_model import GlobalForecastingModel from darts.tests.base_test_class import DartsBaseTestClass from darts.utils import timeseries_generation as tg @@ -889,3 +890,89 @@ def test_gradient_boosted_model_with_eval_set(self, lgb_fit_patch): assert lgb_fit_patch.call_args[1]["eval_set"] is not None assert lgb_fit_patch.call_args[1]["early_stopping_rounds"] == 2 + + class ProbabilisticRegressionModelsTestCase(DartsBaseTestClass): + models_cls_kwargs_errs = [ + (LightGBMModel, {"lags": 2, "likelihood": "quantile"}, 0.4), + ( + LightGBMModel, + { + "lags": 2, + "likelihood": "quantile", + "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9], + }, + 0.4, + ), + (LightGBMModel, {"lags": 2, "likelihood": "poisson"}, 0.6), + ] + + np.random.seed(420) + + constant_ts = tg.constant_timeseries(length=200, value=0.5) + constant_noisy_ts = constant_ts + tg.gaussian_timeseries(length=200, std=0.1) + constant_multivar_ts = constant_ts.stack(constant_ts) + constant_noisy_multivar_ts = constant_noisy_ts.stack(constant_noisy_ts) + num_samples = 5 + + def test_fit_predict_determinism(self): + + for model_cls, model_kwargs, _ in self.models_cls_kwargs_errs: + # whether the first predictions of two models initiated with the same random state are the same + model = model_cls(**model_kwargs) + model.fit(self.constant_noisy_multivar_ts) + pred1 = model.predict(n=10, num_samples=2).values() + + model = model_cls(**model_kwargs) + model.fit(self.constant_noisy_multivar_ts) + pred2 = model.predict(n=10, num_samples=2).values() + + self.assertTrue((pred1 == pred2).all()) + + # test whether the next prediction of the same model is different + pred3 = model.predict(n=10, num_samples=2).values() + self.assertTrue((pred2 != pred3).any()) + + def test_probabilistic_forecast_accuracy(self): + for model_cls, model_kwargs, err in self.models_cls_kwargs_errs: + self.helper_test_probabilistic_forecast_accuracy( + model_cls, + model_kwargs, + err, + self.constant_ts, + self.constant_noisy_ts, + ) + if issubclass(model_cls, GlobalForecastingModel): + self.helper_test_probabilistic_forecast_accuracy( + model_cls, + model_kwargs, + err, + self.constant_multivar_ts, + self.constant_noisy_multivar_ts, + ) + + def helper_test_probabilistic_forecast_accuracy( + self, model_cls, model_kwargs, err, ts, noisy_ts + ): + model = model_cls(**model_kwargs) + model.fit(noisy_ts[:100]) + pred = model.predict(n=100, num_samples=100) + + # test accuracy of the median prediction compared to the noiseless ts + mae_err_median = mae(ts[100:], pred) + self.assertLess(mae_err_median, err) + + # test accuracy for increasing quantiles between 0.7 and 1 (it should ~decrease, mae should ~increase) + tested_quantiles = [0.7, 0.8, 0.9, 0.99] + mae_err = mae_err_median + for quantile in tested_quantiles: + new_mae = mae(ts[100:], pred.quantile_timeseries(quantile=quantile)) + self.assertLess(mae_err, new_mae + 0.1) + mae_err = new_mae + + # test accuracy for decreasing quantiles between 0.3 and 0 (it should ~decrease, mae should ~increase) + tested_quantiles = [0.3, 0.2, 0.1, 0.01] + mae_err = mae_err_median + for quantile in tested_quantiles: + new_mae = mae(ts[100:], pred.quantile_timeseries(quantile=quantile)) + self.assertLess(mae_err, new_mae + 0.1) + mae_err = new_mae From 60311e38fca972b2176136d5272ffdb65ffd4ca8 Mon Sep 17 00:00:00 2001 From: Gian Wiher Date: Mon, 7 Mar 2022 14:11:39 +0100 Subject: [PATCH 02/13] added likelihoods to linear model, updated tests, refactored lgbm quantile regression --- .../forecasting/gradient_boosted_model.py | 120 ++--------- .../forecasting/linear_regression_model.py | 192 +++++++++++++++++- darts/models/forecasting/regression_model.py | 118 +++++++++++ .../forecasting/test_regression_models.py | 8 +- 4 files changed, 323 insertions(+), 115 deletions(-) diff --git a/darts/models/forecasting/gradient_boosted_model.py b/darts/models/forecasting/gradient_boosted_model.py index f2ee800665..ef5d27d700 100644 --- a/darts/models/forecasting/gradient_boosted_model.py +++ b/darts/models/forecasting/gradient_boosted_model.py @@ -8,22 +8,19 @@ https://github.com/unit8co/darts/blob/master/README.md """ -from collections import OrderedDict from typing import List, Optional, Sequence, Tuple, Union import lightgbm as lgb import numpy as np -import xarray as xr from darts.logging import get_logger -from darts.models.forecasting.regression_model import RegressionModel +from darts.models.forecasting.regression_model import RegressionModel, _LikelihoodMixin from darts.timeseries import TimeSeries -from darts.utils.utils import raise_if_not logger = get_logger(__name__) -class LightGBMModel(RegressionModel): +class LightGBMModel(RegressionModel, _LikelihoodMixin): def __init__( self, lags: Union[int, list] = None, @@ -64,47 +61,21 @@ def __init__( """ self.kwargs = kwargs self._median_idx = None - self._model_container = _LightGBMModelContainer() - self.quantiles = quantiles + self._model_container = None + self.quantiles = None self.likelihood = likelihood self._rng = None # parse likelihood available_likelihoods = ["quantile", "poisson"] # to be extended if likelihood is not None: - raise_if_not( - likelihood in available_likelihoods, - f"If likelihood is specified it must be one of {available_likelihoods}", - ) + self._check_likelihood(likelihood, available_likelihoods) self.kwargs["objective"] = likelihood self._rng = np.random.default_rng(seed=420) + if likelihood == "quantile": - if quantiles is None: - self.quantiles = [ - 0.01, - 0.05, - 0.1, - 0.15, - 0.2, - 0.25, - 0.3, - 0.4, - 0.45, - 0.5, - 0.55, - 0.6, - 0.7, - 0.75, - 0.8, - 0.85, - 0.9, - 0.95, - 0.99, - ] - else: - self.quantiles = sorted(self.quantiles) - self._check_quantiles(self.quantiles) - self._median_idx = self.quantiles.index(0.5) + self.quantiles, self._median_idx = self._prepare_quantiles(quantiles) + self._model_container = self._get_model_container() super().__init__( lags=lags, @@ -179,6 +150,8 @@ def fit( self._model_container[quantile] = self.model + return self + super().fit( series=series, past_covariates=past_covariates, @@ -232,10 +205,7 @@ def predict( model_outputs = np.concatenate(model_outputs, axis=-1) samples = self._sample_quantiles(model_outputs, num_samples) # build timeseries from samples - new_xa = xr.DataArray( - samples, dims=prediction._xa.dims, coords=prediction._xa.coords - ) - return TimeSeries(new_xa) + return self._ts_like(prediction, samples) if self.likelihood == "poisson": prediction = super().predict( @@ -245,74 +215,8 @@ def predict( np.array(prediction._xa.to_numpy()), num_samples ) # build timeseries from samples - new_xa = xr.DataArray( - samples, dims=prediction._xa.dims, coords=prediction._xa.coords - ) - return TimeSeries(new_xa) + return self._ts_like(prediction, samples) return super().predict( n, series, past_covariates, future_covariates, num_samples, **kwargs ) - - def _sample_quantiles( - self, model_output: np.ndarray, num_samples: int - ) -> np.ndarray: - """ - This method is ported to numpy from the probabilistic torch models module - model_output is of shape (n_timesteps, n_components, n_quantiles) - """ - raise_if_not(all([isinstance(num_samples, int), num_samples > 0])) - quantiles = np.tile(np.array(self.quantiles), (num_samples, 1)) - probas = np.tile( - self._rng.uniform(size=(num_samples,)), (len(self.quantiles), 1) - ) - - quantile_idxs = np.sum(probas.T > quantiles, axis=1) - - # To make the sampling symmetric around the median, we assign the two "probability buckets" before and after - # the median to the median value. If we don't do that, the highest quantile would be wrongly sampled - # too often as it would capture the "probability buckets" preceding and following it. - # - # Example; the arrows shows how the buckets map to values: [--> 0.1 --> 0.25 --> 0.5 <-- 0.75 <-- 0.9 <--] - quantile_idxs = np.where( - quantile_idxs <= self._median_idx, quantile_idxs, quantile_idxs - 1 - ) - - return model_output[:, :, quantile_idxs] - - def _sample_poisson(self, model_output: np.ndarray, num_samples: int) -> np.ndarray: - raise_if_not(all([isinstance(num_samples, int), num_samples > 0])) - return self._rng.poisson( - lam=model_output, size=(*model_output.shape[:2], num_samples) - ).astype(float) - - @staticmethod - def _check_quantiles(quantiles): - raise_if_not( - all([0 < q < 1 for q in quantiles]), - "All provided quantiles must be between 0 and 1.", - ) - - # we require the median to be present and the quantiles to be symmetric around it, - # for correctness of sampling. - median_q = 0.5 - raise_if_not( - median_q in quantiles, "median quantile `q=0.5` must be in `quantiles`" - ) - is_centered = [ - -1e-6 < (median_q - left_q) + (median_q - right_q) < 1e-6 - for left_q, right_q in zip(quantiles, quantiles[::-1]) - ] - raise_if_not( - all(is_centered), - "quantiles lower than `q=0.5` need to share same difference to `0.5` as quantiles " - "higher than `q=0.5`", - ) - - -class _LightGBMModelContainer(OrderedDict): - def __init__(self): - super().__init__() - - def __str__(self): - return f"_LightGBMModelContainer(quantiles={list(self.keys())})" diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py index 8d81858a42..2f6efce6f8 100644 --- a/darts/models/forecasting/linear_regression_model.py +++ b/darts/models/forecasting/linear_regression_model.py @@ -5,23 +5,27 @@ A forecasting model using a linear regression of some of the target series' lags, as well as optionally some covariate series' lags in order to obtain a forecast. """ -from typing import List, Tuple, Union +from typing import List, Optional, Sequence, Tuple, Union -from sklearn.linear_model import LinearRegression +import numpy as np +from sklearn.linear_model import LinearRegression, PoissonRegressor, QuantileRegressor from darts.logging import get_logger -from darts.models.forecasting.regression_model import RegressionModel +from darts.models.forecasting.regression_model import RegressionModel, _LikelihoodMixin +from darts.timeseries import TimeSeries logger = get_logger(__name__) -class LinearRegressionModel(RegressionModel): +class LinearRegressionModel(RegressionModel, _LikelihoodMixin): def __init__( self, lags: Union[int, list] = None, lags_past_covariates: Union[int, List[int]] = None, lags_future_covariates: Union[Tuple[int, int], List[int]] = None, output_chunk_length: int = 1, + likelihood: str = None, + quantiles: List[float] = None, **kwargs, ): """Linear regression model. @@ -44,17 +48,195 @@ def __init__( Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may be useful if the covariates don't extend far enough into the future. + likelihood + The objective used by the model. Currently, only `quantile` and 'poisson' are available. Allows sampling + from the model. + quantiles + If the `likelihood` is set to `quantile`, use these quantiles to samples from. **kwargs Additional keyword arguments passed to `sklearn.linear_model.LinearRegression`. """ self.kwargs = kwargs + self._median_idx = None + self._model_container = None + self.quantiles = None + self.likelihood = likelihood + self._rng = None + + # parse likelihood + available_likelihoods = ["quantile", "poisson"] # to be extended + if likelihood is not None: + self._check_likelihood(likelihood, available_likelihoods) + self._rng = np.random.default_rng(seed=420) + + if likelihood == "poisson": + model = PoissonRegressor(**kwargs) + if likelihood == "quantile": + model = QuantileRegressor(**kwargs) + self.quantiles, self._median_idx = self._prepare_quantiles(quantiles) + self._model_container = self._get_model_container() + else: + model = LinearRegression(**kwargs) + super().__init__( lags=lags, lags_past_covariates=lags_past_covariates, lags_future_covariates=lags_future_covariates, output_chunk_length=output_chunk_length, - model=LinearRegression(**kwargs), + model=model, ) def __str__(self): return f"LinearRegression(lags={self.lags})" + + def fit( + self, + series: Union[TimeSeries, Sequence[TimeSeries]], + past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, + future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, + max_samples_per_ts: Optional[int] = None, + n_jobs_multioutput_wrapper: Optional[int] = None, + **kwargs, + ): + """ + Fit/train the model on one or multiple series. + + Parameters + ---------- + series + TimeSeries or Sequence[TimeSeries] object containing the target values. + past_covariates + Optionally, a series or sequence of series specifying past-observed covariates + future_covariates + Optionally, a series or sequence of series specifying future-known covariates + max_samples_per_ts + This is an integer upper bound on the number of tuples that can be produced + per time series. It can be used in order to have an upper bound on the total size of the dataset and + ensure proper sampling. If `None`, it will read all of the individual time series in advance (at dataset + creation) to know their sizes, which might be expensive on big datasets. + If some series turn out to have a length that would allow more than `max_samples_per_ts`, only the + most recent `max_samples_per_ts` samples will be considered. + n_jobs_multioutput_wrapper + Number of jobs of the MultiOutputRegressor wrapper to run in parallel. Only used if the model doesn't + support multi-output regression natively. + **kwargs + Additional keyword arguments passed to the `fit` method of the model. + """ + + if self.likelihood == "quantile": + # empty model container in case of multiple calls to fit, e.g. when backtesting + self._model_container.clear() + for i, quantile in enumerate(self.quantiles): + self.kwargs["quantile"] = quantile + if i == 0: + # check solver + if "solver" not in self.kwargs: + # set default fast solver + self.kwargs["solver"] = "highs" + try: + self.model = QuantileRegressor(**self.kwargs) + super().fit( + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, + max_samples_per_ts=max_samples_per_ts, + **kwargs, + ) + except ValueError: + logger.warning( + f"Solver {self.kwargs.get('solver')} is not available. Upgrade scipy" + f" to access faster solvers." + ) + # set to slow (legacy) solver + self.kwargs["solver"] = "interior-point" + self.model = QuantileRegressor(**self.kwargs) + super().fit( + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, + max_samples_per_ts=max_samples_per_ts, + **kwargs, + ) + self.model = QuantileRegressor(**self.kwargs) + super().fit( + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, + max_samples_per_ts=max_samples_per_ts, + **kwargs, + ) + + self._model_container[quantile] = self.model + + return self + + super().fit( + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, + max_samples_per_ts=max_samples_per_ts, + **kwargs, + ) + + return self + + def predict( + self, + n: int, + series: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, + past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, + future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, + num_samples: int = 1, + **kwargs, + ) -> Union[TimeSeries, Sequence[TimeSeries]]: + """Forecasts values for `n` time steps after the end of the series. + + Parameters + ---------- + n : int + Forecast horizon - the number of time steps after the end of the series for which to produce predictions. + series : TimeSeries or list of TimeSeries, optional + Optionally, one or several input `TimeSeries`, representing the history of the target series whose future + is to be predicted. If specified, the method returns the forecasts of these series. Otherwise, the method + returns the forecast of the (single) training series. + past_covariates : TimeSeries or list of TimeSeries, optional + Optionally, the past-observed covariates series needed as inputs for the model. + They must match the covariates used for training in terms of dimension and type. + future_covariates : TimeSeries or list of TimeSeries, optional + Optionally, the future-known covariates series needed as inputs for the model. + They must match the covariates used for training in terms of dimension and type. + num_samples : int, default: 1 + Currently this parameter is ignored for regression models. + **kwargs : dict, optional + Additional keyword arguments passed to the `predict` method of the model. Only works with + univariate target series. + """ + + if self.likelihood == "quantile": + model_outputs = [] + for quantile, fitted in self._model_container.items(): + self.model = fitted + prediction = super().predict( + n, series, past_covariates, future_covariates, **kwargs + ) + model_outputs.append(prediction._xa.to_numpy()) + model_outputs = np.concatenate(model_outputs, axis=-1) + samples = self._sample_quantiles(model_outputs, num_samples) + + # build timeseries from samples + return self._ts_like(prediction, samples) + + if self.likelihood == "poisson": + prediction = super().predict( + n, series, past_covariates, future_covariates, **kwargs + ) + samples = self._sample_poisson( + np.array(prediction._xa.to_numpy()), num_samples + ) + + # build timeseries from samples + return self._ts_like(prediction, samples) + + return super().predict( + n, series, past_covariates, future_covariates, num_samples, **kwargs + ) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index cb4f378716..342b50d9cf 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -24,10 +24,12 @@ """ import math +from collections import OrderedDict from typing import List, Optional, Sequence, Tuple, Union import numpy as np import pandas as pd +import xarray as xr from sklearn.linear_model import LinearRegression from sklearn.multioutput import MultiOutputRegressor @@ -594,3 +596,119 @@ def predict( def __str__(self): return self.model.__str__() + + +class _LikelihoodMixin: + """ + A class containing functions supporting quantile and poisson regression, to be used as a mixin in + [`PreTrainedModel`]. + """ + + @staticmethod + def _check_likelihood(likelihood, available_likelihoods): + raise_if_not( + likelihood in available_likelihoods, + f"If likelihood is specified it must be one of {available_likelihoods}", + ) + + @staticmethod + def _get_model_container(): + return _QuantileModelContainer() + + def _prepare_quantiles(self, quantiles): + if quantiles is None: + quantiles = [ + 0.01, + 0.05, + 0.1, + 0.15, + 0.2, + 0.25, + 0.3, + 0.4, + 0.45, + 0.5, + 0.55, + 0.6, + 0.7, + 0.75, + 0.8, + 0.85, + 0.9, + 0.95, + 0.99, + ] + else: + quantiles = sorted(quantiles) + self._check_quantiles(quantiles) + median_idx = quantiles.index(0.5) + + return quantiles, median_idx + + def _sample_quantiles( + self, model_output: np.ndarray, num_samples: int + ) -> np.ndarray: + """ + This method is ported to numpy from the probabilistic torch models module + model_output is of shape (n_timesteps, n_components, n_quantiles) + """ + raise_if_not(all([isinstance(num_samples, int), num_samples > 0])) + quantiles = np.tile(np.array(self.quantiles), (num_samples, 1)) + probas = np.tile( + self._rng.uniform(size=(num_samples,)), (len(self.quantiles), 1) + ) + + quantile_idxs = np.sum(probas.T > quantiles, axis=1) + + # To make the sampling symmetric around the median, we assign the two "probability buckets" before and after + # the median to the median value. If we don't do that, the highest quantile would be wrongly sampled + # too often as it would capture the "probability buckets" preceding and following it. + # + # Example; the arrows shows how the buckets map to values: [--> 0.1 --> 0.25 --> 0.5 <-- 0.75 <-- 0.9 <--] + quantile_idxs = np.where( + quantile_idxs <= self._median_idx, quantile_idxs, quantile_idxs - 1 + ) + + return model_output[:, :, quantile_idxs] + + def _sample_poisson(self, model_output: np.ndarray, num_samples: int) -> np.ndarray: + raise_if_not(all([isinstance(num_samples, int), num_samples > 0])) + return self._rng.poisson( + lam=model_output, size=(*model_output.shape[:2], num_samples) + ).astype(float) + + @staticmethod + def _ts_like(other: TimeSeries, data: np.ndarray) -> TimeSeries: + new_xa = xr.DataArray(data, dims=other._xa.dims, coords=other._xa.coords) + return TimeSeries(new_xa) + + @staticmethod + def _check_quantiles(quantiles): + raise_if_not( + all([0 < q < 1 for q in quantiles]), + "All provided quantiles must be between 0 and 1.", + ) + + # we require the median to be present and the quantiles to be symmetric around it, + # for correctness of sampling. + median_q = 0.5 + raise_if_not( + median_q in quantiles, "median quantile `q=0.5` must be in `quantiles`" + ) + is_centered = [ + -1e-6 < (median_q - left_q) + (median_q - right_q) < 1e-6 + for left_q, right_q in zip(quantiles, quantiles[::-1]) + ] + raise_if_not( + all(is_centered), + "quantiles lower than `q=0.5` need to share same difference to `0.5` as quantiles " + "higher than `q=0.5`", + ) + + +class _QuantileModelContainer(OrderedDict): + def __init__(self): + super().__init__() + + def __str__(self): + return f"_QuantileModelContainer(quantiles={list(self.keys())})" diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 640996965f..31933275ee 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -904,10 +904,14 @@ class ProbabilisticRegressionModelsTestCase(DartsBaseTestClass): 0.4, ), (LightGBMModel, {"lags": 2, "likelihood": "poisson"}, 0.6), + ( + LinearRegressionModel, + {"lags": 2, "likelihood": "quantile", "solver": "highs"}, + 0.6, + ), + (LinearRegressionModel, {"lags": 2, "likelihood": "poisson"}, 0.6), ] - np.random.seed(420) - constant_ts = tg.constant_timeseries(length=200, value=0.5) constant_noisy_ts = constant_ts + tg.gaussian_timeseries(length=200, std=0.1) constant_multivar_ts = constant_ts.stack(constant_ts) From fd96c09ff7dd5b46fc453ab350485408a7cf24f3 Mon Sep 17 00:00:00 2001 From: Gian Wiher Date: Mon, 7 Mar 2022 14:28:24 +0100 Subject: [PATCH 03/13] fixed docstring --- darts/models/forecasting/regression_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 342b50d9cf..a0e1f54394 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -600,8 +600,8 @@ def __str__(self): class _LikelihoodMixin: """ - A class containing functions supporting quantile and poisson regression, to be used as a mixin in - [`PreTrainedModel`]. + A class containing functions supporting quantile and poisson regression, to be used as a mixin for some + `RegressionModel` subclasses. """ @staticmethod From 735f89bd56e2f2a080051d9e651881adcbd14c68 Mon Sep 17 00:00:00 2001 From: Gian Wiher Date: Mon, 7 Mar 2022 14:30:33 +0100 Subject: [PATCH 04/13] fixed test case --- darts/tests/models/forecasting/test_regression_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 31933275ee..41bc3663ec 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -906,7 +906,7 @@ class ProbabilisticRegressionModelsTestCase(DartsBaseTestClass): (LightGBMModel, {"lags": 2, "likelihood": "poisson"}, 0.6), ( LinearRegressionModel, - {"lags": 2, "likelihood": "quantile", "solver": "highs"}, + {"lags": 2, "likelihood": "quantile"}, 0.6, ), (LinearRegressionModel, {"lags": 2, "likelihood": "poisson"}, 0.6), From 2825f089d58811eb31c4e917562511ede6a86ba4 Mon Sep 17 00:00:00 2001 From: gian <98726300+gnwhr@users.noreply.github.com> Date: Thu, 10 Mar 2022 08:36:31 +0100 Subject: [PATCH 05/13] Update darts/models/forecasting/gradient_boosted_model.py Co-authored-by: Julien Herzen --- darts/models/forecasting/gradient_boosted_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/gradient_boosted_model.py b/darts/models/forecasting/gradient_boosted_model.py index ef5d27d700..47664faebb 100644 --- a/darts/models/forecasting/gradient_boosted_model.py +++ b/darts/models/forecasting/gradient_boosted_model.py @@ -52,7 +52,7 @@ def __init__( horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may be useful if the covariates don't extend far enough into the future. likelihood - The objective used by the model. Currently, only `quantile` and 'poisson' are available. Allows sampling + Can be set to `quantile` or 'poisson'. If set, the model will be probabilistic, allowing sampling at prediction time. from the model. quantiles If the `likelihood` is set to `quantile`, use these quantiles to samples from. From b5f3015433b1907db76b7ab1eab84fba62a982f7 Mon Sep 17 00:00:00 2001 From: gian <98726300+gnwhr@users.noreply.github.com> Date: Thu, 10 Mar 2022 08:36:41 +0100 Subject: [PATCH 06/13] Update darts/models/forecasting/gradient_boosted_model.py Co-authored-by: Julien Herzen --- darts/models/forecasting/gradient_boosted_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/gradient_boosted_model.py b/darts/models/forecasting/gradient_boosted_model.py index 47664faebb..6fe1a9ad0e 100644 --- a/darts/models/forecasting/gradient_boosted_model.py +++ b/darts/models/forecasting/gradient_boosted_model.py @@ -55,7 +55,7 @@ def __init__( Can be set to `quantile` or 'poisson'. If set, the model will be probabilistic, allowing sampling at prediction time. from the model. quantiles - If the `likelihood` is set to `quantile`, use these quantiles to samples from. + Fit the model to these quantiles if the `likelihood` is set to `quantile`. **kwargs Additional keyword arguments passed to `lightgbm.LGBRegressor`. """ From a9726322295880c1f0bdda1442f0b9c0344ae335 Mon Sep 17 00:00:00 2001 From: gian <98726300+gnwhr@users.noreply.github.com> Date: Thu, 10 Mar 2022 08:37:40 +0100 Subject: [PATCH 07/13] Update darts/models/forecasting/gradient_boosted_model.py Co-authored-by: Julien Herzen --- darts/models/forecasting/gradient_boosted_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/gradient_boosted_model.py b/darts/models/forecasting/gradient_boosted_model.py index 6fe1a9ad0e..28ecf0ec40 100644 --- a/darts/models/forecasting/gradient_boosted_model.py +++ b/darts/models/forecasting/gradient_boosted_model.py @@ -201,7 +201,7 @@ def predict( prediction = super().predict( n, series, past_covariates, future_covariates, **kwargs ) - model_outputs.append(prediction._xa.to_numpy()) + model_outputs.append(prediction.all_values(copy=False)) model_outputs = np.concatenate(model_outputs, axis=-1) samples = self._sample_quantiles(model_outputs, num_samples) # build timeseries from samples From 6baacd423209028263e5a794bb2cd2debef3d825 Mon Sep 17 00:00:00 2001 From: gian <98726300+gnwhr@users.noreply.github.com> Date: Thu, 10 Mar 2022 08:37:48 +0100 Subject: [PATCH 08/13] Update darts/models/forecasting/gradient_boosted_model.py Co-authored-by: Julien Herzen --- darts/models/forecasting/gradient_boosted_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/gradient_boosted_model.py b/darts/models/forecasting/gradient_boosted_model.py index 28ecf0ec40..d5f4b8ce74 100644 --- a/darts/models/forecasting/gradient_boosted_model.py +++ b/darts/models/forecasting/gradient_boosted_model.py @@ -212,7 +212,7 @@ def predict( n, series, past_covariates, future_covariates, **kwargs ) samples = self._sample_poisson( - np.array(prediction._xa.to_numpy()), num_samples + np.array(prediction.all_values(copy=False)), num_samples ) # build timeseries from samples return self._ts_like(prediction, samples) From 5d68304914e3f32153a3600230d4195b786f4d61 Mon Sep 17 00:00:00 2001 From: gian <98726300+gnwhr@users.noreply.github.com> Date: Thu, 10 Mar 2022 08:39:28 +0100 Subject: [PATCH 09/13] Update darts/models/forecasting/linear_regression_model.py Co-authored-by: Julien Herzen --- darts/models/forecasting/linear_regression_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py index 2f6efce6f8..6bdd5f3e1d 100644 --- a/darts/models/forecasting/linear_regression_model.py +++ b/darts/models/forecasting/linear_regression_model.py @@ -219,7 +219,7 @@ def predict( prediction = super().predict( n, series, past_covariates, future_covariates, **kwargs ) - model_outputs.append(prediction._xa.to_numpy()) + model_outputs.append(prediction.all_values(copy=False)) model_outputs = np.concatenate(model_outputs, axis=-1) samples = self._sample_quantiles(model_outputs, num_samples) From ddc5c8e5fc501c4fc1df7791b84548ce34d71390 Mon Sep 17 00:00:00 2001 From: gian <98726300+gnwhr@users.noreply.github.com> Date: Thu, 10 Mar 2022 09:00:41 +0100 Subject: [PATCH 10/13] Apply suggestions from code review Co-authored-by: Julien Herzen --- darts/models/forecasting/linear_regression_model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py index 6bdd5f3e1d..ec380ee22b 100644 --- a/darts/models/forecasting/linear_regression_model.py +++ b/darts/models/forecasting/linear_regression_model.py @@ -54,7 +54,7 @@ def __init__( quantiles If the `likelihood` is set to `quantile`, use these quantiles to samples from. **kwargs - Additional keyword arguments passed to `sklearn.linear_model.LinearRegression`. + Additional keyword arguments passed to `sklearn.linear_model.LinearRegression` (by default), to `sklearn.linear_model.PoissonRegressor` (if `likelihood="poisson"`), or to `sklearn.linear_model.QuantileRegressor` (if `likelihood="quantile"`). """ self.kwargs = kwargs self._median_idx = None @@ -226,12 +226,12 @@ def predict( # build timeseries from samples return self._ts_like(prediction, samples) - if self.likelihood == "poisson": + elif self.likelihood == "poisson": prediction = super().predict( n, series, past_covariates, future_covariates, **kwargs ) samples = self._sample_poisson( - np.array(prediction._xa.to_numpy()), num_samples + np.array(prediction.all_values(copy=False)), num_samples ) # build timeseries from samples From 4438fe7f9347914ac8fe7176a3bd3bacc2ecf279 Mon Sep 17 00:00:00 2001 From: Gian Wiher Date: Thu, 10 Mar 2022 11:41:17 +0100 Subject: [PATCH 11/13] applied suggestions from code review, adjustet test and moved _check_quantiles to utils/utils.py --- .../forecasting/gradient_boosted_model.py | 11 ++- .../forecasting/linear_regression_model.py | 75 +++++++++---------- darts/models/forecasting/regression_model.py | 39 +--------- .../forecasting/test_regression_models.py | 25 +++++-- darts/utils/likelihood_models.py | 27 +------ darts/utils/utils.py | 23 ++++++ 6 files changed, 91 insertions(+), 109 deletions(-) diff --git a/darts/models/forecasting/gradient_boosted_model.py b/darts/models/forecasting/gradient_boosted_model.py index d5f4b8ce74..3881c2e549 100644 --- a/darts/models/forecasting/gradient_boosted_model.py +++ b/darts/models/forecasting/gradient_boosted_model.py @@ -29,6 +29,7 @@ def __init__( output_chunk_length: int = 1, likelihood: str = None, quantiles: List[float] = None, + random_state: Optional[int] = None, **kwargs, ): """Light Gradient Boosted Model @@ -52,13 +53,17 @@ def __init__( horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may be useful if the covariates don't extend far enough into the future. likelihood - Can be set to `quantile` or 'poisson'. If set, the model will be probabilistic, allowing sampling at prediction time. - from the model. + Can be set to `quantile` or 'poisson'. If set, the model will be probabilistic, allowing sampling at + prediction time. quantiles Fit the model to these quantiles if the `likelihood` is set to `quantile`. + random_state + Control the randomness in the fitting procedure and for sampling. + Default: ``None``. **kwargs Additional keyword arguments passed to `lightgbm.LGBRegressor`. """ + kwargs["random_state"] = random_state # seed for tree learner self.kwargs = kwargs self._median_idx = None self._model_container = None @@ -71,7 +76,7 @@ def __init__( if likelihood is not None: self._check_likelihood(likelihood, available_likelihoods) self.kwargs["objective"] = likelihood - self._rng = np.random.default_rng(seed=420) + self._rng = np.random.default_rng(seed=random_state) # seed for sampling if likelihood == "quantile": self.quantiles, self._median_idx = self._prepare_quantiles(quantiles) diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py index ec380ee22b..5ea19983cd 100644 --- a/darts/models/forecasting/linear_regression_model.py +++ b/darts/models/forecasting/linear_regression_model.py @@ -8,6 +8,7 @@ from typing import List, Optional, Sequence, Tuple, Union import numpy as np +from scipy.optimize import linprog from sklearn.linear_model import LinearRegression, PoissonRegressor, QuantileRegressor from darts.logging import get_logger @@ -26,6 +27,7 @@ def __init__( output_chunk_length: int = 1, likelihood: str = None, quantiles: List[float] = None, + random_state: Optional[int] = None, **kwargs, ): """Linear regression model. @@ -49,12 +51,20 @@ def __init__( horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may be useful if the covariates don't extend far enough into the future. likelihood - The objective used by the model. Currently, only `quantile` and 'poisson' are available. Allows sampling - from the model. + Can be set to `quantile` or 'poisson'. If set, the model will be probabilistic, allowing sampling at + prediction time. If set to `quantile`, the `sklearn.linear_model.QuantileRegressor` is used. Similarly, if + set to `poisson`, the `sklearn.linear_model.PoissonRegressor` is used. quantiles - If the `likelihood` is set to `quantile`, use these quantiles to samples from. + Fit the model to these quantiles if the `likelihood` is set to `quantile`. + random_state + Control the randomness of the sampling. Used as seed for + `link `_ . Ignored when + no`likelihood` is set. + Default: ``None``. **kwargs - Additional keyword arguments passed to `sklearn.linear_model.LinearRegression` (by default), to `sklearn.linear_model.PoissonRegressor` (if `likelihood="poisson"`), or to `sklearn.linear_model.QuantileRegressor` (if `likelihood="quantile"`). + Additional keyword arguments passed to `sklearn.linear_model.LinearRegression` (by default), to + `sklearn.linear_model.PoissonRegressor` (if `likelihood="poisson"`), or to + `sklearn.linear_model.QuantileRegressor` (if `likelihood="quantile"`). """ self.kwargs = kwargs self._median_idx = None @@ -67,7 +77,7 @@ def __init__( available_likelihoods = ["quantile", "poisson"] # to be extended if likelihood is not None: self._check_likelihood(likelihood, available_likelihoods) - self._rng = np.random.default_rng(seed=420) + self._rng = np.random.default_rng(seed=random_state) if likelihood == "poisson": model = PoissonRegressor(**kwargs) @@ -126,37 +136,25 @@ def fit( if self.likelihood == "quantile": # empty model container in case of multiple calls to fit, e.g. when backtesting self._model_container.clear() + + # set solver for linear program + if "solver" not in self.kwargs: + # set default fast solver + self.kwargs["solver"] = "highs" + + # test solver availability with dummy problem + c = [1] + try: + linprog(c=c, method=self.kwargs["solver"]) + except ValueError as ve: + logger.warning( + f"{ve}. Upgrading scipy enables significantly faster solvers" + ) + # set solver to slow legacy + self.kwargs["solver"] = "interior-point" + for i, quantile in enumerate(self.quantiles): self.kwargs["quantile"] = quantile - if i == 0: - # check solver - if "solver" not in self.kwargs: - # set default fast solver - self.kwargs["solver"] = "highs" - try: - self.model = QuantileRegressor(**self.kwargs) - super().fit( - series=series, - past_covariates=past_covariates, - future_covariates=future_covariates, - max_samples_per_ts=max_samples_per_ts, - **kwargs, - ) - except ValueError: - logger.warning( - f"Solver {self.kwargs.get('solver')} is not available. Upgrade scipy" - f" to access faster solvers." - ) - # set to slow (legacy) solver - self.kwargs["solver"] = "interior-point" - self.model = QuantileRegressor(**self.kwargs) - super().fit( - series=series, - past_covariates=past_covariates, - future_covariates=future_covariates, - max_samples_per_ts=max_samples_per_ts, - **kwargs, - ) self.model = QuantileRegressor(**self.kwargs) super().fit( series=series, @@ -206,7 +204,7 @@ def predict( Optionally, the future-known covariates series needed as inputs for the model. They must match the covariates used for training in terms of dimension and type. num_samples : int, default: 1 - Currently this parameter is ignored for regression models. + Specifies the numer of samples to obtain from the model. Should be set to 1 if no `likelihood` is specified. **kwargs : dict, optional Additional keyword arguments passed to the `predict` method of the model. Only works with univariate target series. @@ -237,6 +235,7 @@ def predict( # build timeseries from samples return self._ts_like(prediction, samples) - return super().predict( - n, series, past_covariates, future_covariates, num_samples, **kwargs - ) + else: + return super().predict( + n, series, past_covariates, future_covariates, num_samples, **kwargs + ) diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 4d8ef10cfe..37113a8778 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -36,6 +36,7 @@ from darts.logging import get_logger, raise_if, raise_if_not, raise_log from darts.models.forecasting.forecasting_model import GlobalForecastingModel from darts.timeseries import TimeSeries +from darts.utils.utils import _check_quantiles logger = get_logger(__name__) @@ -624,32 +625,23 @@ def _check_likelihood(likelihood, available_likelihoods): def _get_model_container(): return _QuantileModelContainer() - def _prepare_quantiles(self, quantiles): + @staticmethod + def _prepare_quantiles(quantiles): if quantiles is None: quantiles = [ 0.01, 0.05, 0.1, - 0.15, - 0.2, 0.25, - 0.3, - 0.4, - 0.45, 0.5, - 0.55, - 0.6, - 0.7, 0.75, - 0.8, - 0.85, 0.9, 0.95, 0.99, ] else: quantiles = sorted(quantiles) - self._check_quantiles(quantiles) + _check_quantiles(quantiles) median_idx = quantiles.index(0.5) return quantiles, median_idx @@ -691,29 +683,6 @@ def _ts_like(other: TimeSeries, data: np.ndarray) -> TimeSeries: new_xa = xr.DataArray(data, dims=other._xa.dims, coords=other._xa.coords) return TimeSeries(new_xa) - @staticmethod - def _check_quantiles(quantiles): - raise_if_not( - all([0 < q < 1 for q in quantiles]), - "All provided quantiles must be between 0 and 1.", - ) - - # we require the median to be present and the quantiles to be symmetric around it, - # for correctness of sampling. - median_q = 0.5 - raise_if_not( - median_q in quantiles, "median quantile `q=0.5` must be in `quantiles`" - ) - is_centered = [ - -1e-6 < (median_q - left_q) + (median_q - right_q) < 1e-6 - for left_q, right_q in zip(quantiles, quantiles[::-1]) - ] - raise_if_not( - all(is_centered), - "quantiles lower than `q=0.5` need to share same difference to `0.5` as quantiles " - "higher than `q=0.5`", - ) - class _QuantileModelContainer(OrderedDict): def __init__(self): diff --git a/darts/tests/models/forecasting/test_regression_models.py b/darts/tests/models/forecasting/test_regression_models.py index 41bc3663ec..1ec67b1e60 100644 --- a/darts/tests/models/forecasting/test_regression_models.py +++ b/darts/tests/models/forecasting/test_regression_models.py @@ -893,23 +893,36 @@ def test_gradient_boosted_model_with_eval_set(self, lgb_fit_patch): class ProbabilisticRegressionModelsTestCase(DartsBaseTestClass): models_cls_kwargs_errs = [ - (LightGBMModel, {"lags": 2, "likelihood": "quantile"}, 0.4), + ( + LightGBMModel, + {"lags": 2, "likelihood": "quantile", "random_state": 42}, + 0.4, + ), ( LightGBMModel, { "lags": 2, "likelihood": "quantile", "quantiles": [0.1, 0.3, 0.5, 0.7, 0.9], + "random_state": 42, }, 0.4, ), - (LightGBMModel, {"lags": 2, "likelihood": "poisson"}, 0.6), + ( + LightGBMModel, + {"lags": 2, "likelihood": "poisson", "random_state": 42}, + 0.6, + ), + ( + LinearRegressionModel, + {"lags": 2, "likelihood": "quantile", "random_state": 42}, + 0.6, + ), ( LinearRegressionModel, - {"lags": 2, "likelihood": "quantile"}, + {"lags": 2, "likelihood": "poisson", "random_state": 42}, 0.6, ), - (LinearRegressionModel, {"lags": 2, "likelihood": "poisson"}, 0.6), ] constant_ts = tg.constant_timeseries(length=200, value=0.5) @@ -932,10 +945,6 @@ def test_fit_predict_determinism(self): self.assertTrue((pred1 == pred2).all()) - # test whether the next prediction of the same model is different - pred3 = model.predict(n=10, num_samples=2).values() - self.assertTrue((pred2 != pred3).any()) - def test_probabilistic_forecast_accuracy(self): for model_cls, model_kwargs, err in self.models_cls_kwargs_errs: self.helper_test_probabilistic_forecast_accuracy( diff --git a/darts/utils/likelihood_models.py b/darts/utils/likelihood_models.py index 18b4590a15..ead3adc688 100644 --- a/darts/utils/likelihood_models.py +++ b/darts/utils/likelihood_models.py @@ -56,7 +56,7 @@ from torch.distributions.kl import kl_divergence # TODO: Table on README listing distribution, possible priors and wiki article -from darts.utils.utils import raise_if_not +from darts.utils.utils import _check_quantiles, raise_if_not MIN_CAUCHY_GAMMA_SAMPLING = 1e-100 @@ -993,7 +993,7 @@ def __init__(self, quantiles: Optional[List[float]] = None): ] else: self.quantiles = sorted(quantiles) - self._check_quantiles(self.quantiles) + _check_quantiles(self.quantiles) self._median_idx = self.quantiles.index(0.5) self.first = True self.quantiles_tensor = None @@ -1071,29 +1071,6 @@ def compute_loss(self, model_output: torch.Tensor, target: torch.Tensor): return losses.sum(dim=dim_q).mean() - @staticmethod - def _check_quantiles(quantiles): - raise_if_not( - all([0 < q < 1 for q in quantiles]), - "All provided quantiles must be between 0 and 1.", - ) - - # we require the median to be present and the quantiles to be symmetric around it, - # for correctness of sampling. - median_q = 0.5 - raise_if_not( - median_q in quantiles, "median quantile `q=0.5` must be in `quantiles`" - ) - is_centered = [ - -1e-6 < (median_q - left_q) + (median_q - right_q) < 1e-6 - for left_q, right_q in zip(quantiles, quantiles[::-1]) - ] - raise_if_not( - all(is_centered), - "quantiles lower than `q=0.5` need to share same difference to `0.5` as quantiles " - "higher than `q=0.5`", - ) - def _distr_from_params(self, params: Tuple) -> None: # This should not be called in this class (we are abusing Likelihood) return None diff --git a/darts/utils/utils.py b/darts/utils/utils.py index 745009cbb4..37107c7842 100644 --- a/darts/utils/utils.py +++ b/darts/utils/utils.py @@ -293,3 +293,26 @@ def _parallel_apply( delayed(fn)(*sample, *fn_args, **fn_kwargs) for sample in iterator ) return returned_data + + +def _check_quantiles(quantiles): + raise_if_not( + all([0 < q < 1 for q in quantiles]), + "All provided quantiles must be between 0 and 1.", + ) + + # we require the median to be present and the quantiles to be symmetric around it, + # for correctness of sampling. + median_q = 0.5 + raise_if_not( + median_q in quantiles, "median quantile `q=0.5` must be in `quantiles`" + ) + is_centered = [ + -1e-6 < (median_q - left_q) + (median_q - right_q) < 1e-6 + for left_q, right_q in zip(quantiles, quantiles[::-1]) + ] + raise_if_not( + all(is_centered), + "quantiles lower than `q=0.5` need to share same difference to `0.5` as quantiles " + "higher than `q=0.5`", + ) From bc2db6c1b77bbbefe5ec7d5e64cb379fbc409b2e Mon Sep 17 00:00:00 2001 From: Gian Wiher Date: Thu, 10 Mar 2022 12:57:14 +0100 Subject: [PATCH 12/13] removed unnecessary enumerate --- darts/models/forecasting/linear_regression_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py index 5ea19983cd..4710fa703d 100644 --- a/darts/models/forecasting/linear_regression_model.py +++ b/darts/models/forecasting/linear_regression_model.py @@ -153,7 +153,7 @@ def fit( # set solver to slow legacy self.kwargs["solver"] = "interior-point" - for i, quantile in enumerate(self.quantiles): + for quantile in self.quantiles: self.kwargs["quantile"] = quantile self.model = QuantileRegressor(**self.kwargs) super().fit( From 2fe0fe1adc89d2d59080c935692114d23887dc29 Mon Sep 17 00:00:00 2001 From: Gian Wiher Date: Thu, 10 Mar 2022 15:00:00 +0100 Subject: [PATCH 13/13] inserted else statement for clarity --- .../forecasting/linear_regression_model.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/darts/models/forecasting/linear_regression_model.py b/darts/models/forecasting/linear_regression_model.py index 4710fa703d..fa2d8cba7c 100644 --- a/darts/models/forecasting/linear_regression_model.py +++ b/darts/models/forecasting/linear_regression_model.py @@ -168,15 +168,16 @@ def fit( return self - super().fit( - series=series, - past_covariates=past_covariates, - future_covariates=future_covariates, - max_samples_per_ts=max_samples_per_ts, - **kwargs, - ) + else: + super().fit( + series=series, + past_covariates=past_covariates, + future_covariates=future_covariates, + max_samples_per_ts=max_samples_per_ts, + **kwargs, + ) - return self + return self def predict( self,