diff --git a/CHANGELOG.md b/CHANGELOG.md index 76e7435b01..5f2e0b0f60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co - Improvements to `EnsembleModel`: - Model creation parameter `forecasting_models` now supports a mix of `LocalForecastingModel` and `GlobalForecastingModel` (single `TimeSeries` training/inference only, due to the local models). [#1745](https://github.com/unit8co/darts/pull/1745) by [Antoine Madrona](https://github.com/madtoinou). - Future and past covariates can now be used even if `forecasting_models` have different covariates support. The covariates passed to `fit()`/`predict()` are used only by models that support it. [#1745](https://github.com/unit8co/darts/pull/1745) by [Antoine Madrona](https://github.com/madtoinou). + - `RegressionEnsembleModel` and `NaiveEnsembleModel` can generate probabilistic forecasts, probabilistics `forecasting_models` can be sampled to train the `regression_model`, updated the documentation (stacking technique). [#1692](https://github.com/unit8co/darts/pull/#1692) by [Antoine Madrona](https://github.com/madtoinou). - Improvements to `ShapExplainer`: - Added static covariates support to `ShapeExplainer`. [#1803](https://github.com/unit8co/darts/pull/#1803) by [Anne de Vries](https://github.com/anne-devries) and [Dennis Bader](https://github.com/dennisbader). diff --git a/darts/models/forecasting/baselines.py b/darts/models/forecasting/baselines.py index b99837de63..e358b9337c 100644 --- a/darts/models/forecasting/baselines.py +++ b/darts/models/forecasting/baselines.py @@ -12,7 +12,7 @@ from darts.logging import get_logger, raise_if_not from darts.models.forecasting.ensemble_model import EnsembleModel from darts.models.forecasting.forecasting_model import ( - GlobalForecastingModel, + ForecastingModel, LocalForecastingModel, ) from darts.timeseries import TimeSeries @@ -164,7 +164,7 @@ def predict(self, n: int, num_samples: int = 1, verbose: bool = False): class NaiveEnsembleModel(EnsembleModel): def __init__( self, - models: Union[List[LocalForecastingModel], List[GlobalForecastingModel]], + models: List[ForecastingModel], show_warnings: bool = True, ): """Naive combination model @@ -182,7 +182,12 @@ def __init__( show_warnings Whether to show warnings related to models covariates support. """ - super().__init__(models=models, show_warnings=show_warnings) + super().__init__( + models=models, + train_num_samples=None, + train_samples_reduction=None, + show_warnings=show_warnings, + ) def fit( self, @@ -209,11 +214,13 @@ def ensemble( self, predictions: Union[TimeSeries, Sequence[TimeSeries]], series: Optional[Sequence[TimeSeries]] = None, + num_samples: int = 1, ) -> Union[TimeSeries, Sequence[TimeSeries]]: def take_average(prediction: TimeSeries) -> TimeSeries: - series = prediction.pd_dataframe(copy=False).sum(axis=1) / len(self.models) - series.name = prediction.components[0] - return TimeSeries.from_series(series) + # average across the components, keep n_samples, rename components + return prediction.mean(axis=1).with_columns_renamed( + "components_mean", prediction.components[0] + ) if isinstance(predictions, Sequence): return [take_average(p) for p in predictions] diff --git a/darts/models/forecasting/ensemble_model.py b/darts/models/forecasting/ensemble_model.py index a69698c9e0..b48d2aa2fd 100644 --- a/darts/models/forecasting/ensemble_model.py +++ b/darts/models/forecasting/ensemble_model.py @@ -6,13 +6,14 @@ from functools import reduce from typing import List, Optional, Sequence, Tuple, Union -from darts.logging import get_logger, raise_if, raise_if_not +from darts.logging import get_logger, raise_if, raise_if_not, raise_log from darts.models.forecasting.forecasting_model import ( ForecastingModel, GlobalForecastingModel, LocalForecastingModel, ) from darts.timeseries import TimeSeries +from darts.utils.utils import series2seq logger = get_logger(__name__) @@ -30,11 +31,28 @@ class EnsembleModel(GlobalForecastingModel): ---------- models List of forecasting models whose predictions to ensemble + + .. note:: + if all the models are probabilistic, the `EnsembleModel` will also be probabilistic. + .. + train_num_samples + Number of prediction samples from each forecasting model for multi-level ensembles. The n_samples + dimension will be reduced using the `train_samples_reduction` method. + train_samples_reduction + If `models` are probabilistic and `train_num_samples` > 1, method used to + reduce the samples dimension to 1. Possible values: "mean", "median" or float value corresponding + to the desired quantile. show_warnings Whether to show warnings related to models covariates support. """ - def __init__(self, models: List[ForecastingModel], show_warnings: bool = True): + def __init__( + self, + models: List[ForecastingModel], + train_num_samples: int, + train_samples_reduction: Union[str, float], + show_warnings: bool = True, + ): raise_if_not( isinstance(models, list) and models, "Cannot instantiate EnsembleModel with an empty list of models", @@ -70,8 +88,44 @@ def __init__(self, models: List[ForecastingModel], show_warnings: bool = True): logger, ) + raise_if( + train_num_samples is not None + and train_num_samples > 1 + and all([not m._is_probabilistic() for m in models]), + "`train_num_samples` is greater than 1 but the `RegressionEnsembleModel` " + "contains only deterministic models.", + logger, + ) + + supported_reduction = ["mean", "median"] + if train_samples_reduction is None: + pass + elif isinstance(train_samples_reduction, float): + raise_if_not( + 0.0 < train_samples_reduction < 1.0, + f"if a float, `train_samples_reduction` must be between " + f"0 and 1, received ({train_samples_reduction})", + logger, + ) + elif isinstance(train_samples_reduction, str): + raise_if( + train_samples_reduction not in supported_reduction, + f"if a string, `train_samples_reduction` must be one of {supported_reduction}, " + f"received ({train_samples_reduction})", + logger, + ) + else: + raise_log( + f"`train_samples_reduction` type not supported " + f"({train_samples_reduction}). Must be `float` " + f" or one of {supported_reduction}.", + logger, + ) + super().__init__() self.models = models + self.train_num_samples = train_num_samples + self.train_samples_reduction = train_samples_reduction if show_warnings: if ( @@ -94,6 +148,7 @@ def __init__(self, models: List[ForecastingModel], show_warnings: bool = True): "To hide these warnings, set `show_warnings=False`." ) + @abstractmethod def fit( self, series: Union[TimeSeries, Sequence[TimeSeries]], @@ -173,10 +228,21 @@ def _make_multiple_predictions( future_covariates=future_covariates if model.supports_future_covariates else None, - num_samples=num_samples, + num_samples=num_samples if model._is_probabilistic() else 1, ) for model in self.models ] + + # reduce the probabilistics series + if ( + self.train_samples_reduction is not None + and self.train_num_samples is not None + and self.train_num_samples > 1 + ): + predictions = [ + self._predictions_reduction(prediction) for prediction in predictions + ] + return ( self._stack_ts_seq(predictions) if is_single_series @@ -202,6 +268,12 @@ def predict( verbose=verbose, ) + # for multi-level models, forecasting models can generate arbitrary number of samples + if self.train_samples_reduction is None: + pred_num_samples = num_samples + else: + pred_num_samples = self.train_num_samples + self._verify_past_future_covariates(past_covariates, future_covariates) predictions = self._make_multiple_predictions( @@ -209,15 +281,17 @@ def predict( series=series, past_covariates=past_covariates, future_covariates=future_covariates, - num_samples=num_samples, + num_samples=pred_num_samples, ) - return self.ensemble(predictions, series=series) + + return self.ensemble(predictions, series=series, num_samples=num_samples) @abstractmethod def ensemble( self, predictions: Union[TimeSeries, Sequence[TimeSeries]], series: Optional[Sequence[TimeSeries]] = None, + num_samples: int = 1, ) -> Union[TimeSeries, Sequence[TimeSeries]]: """ Defines how to ensemble the individual models' predictions to produce a single prediction. @@ -237,6 +311,20 @@ def ensemble( """ pass + def _predictions_reduction(self, predictions: TimeSeries) -> TimeSeries: + """Reduce the sample dimension of the forecasting models predictions""" + is_single_series = isinstance(predictions, TimeSeries) + predictions = series2seq(predictions) + if self.train_samples_reduction == "median": + predictions = [pred.median(axis=2) for pred in predictions] + elif self.train_samples_reduction == "mean": + predictions = [pred.mean(axis=2) for pred in predictions] + else: + predictions = [ + pred.quantile(self.train_samples_reduction) for pred in predictions + ] + return predictions[0] if is_single_series else predictions + @property def min_train_series_length(self) -> int: return max(model.min_train_series_length for model in self.models) @@ -271,9 +359,12 @@ def find_max_lag_or_none(lag_id, aggregator) -> Optional[int]: find_max_lag_or_none(i, agg) for i, agg in enumerate(lag_aggregators) ) - def _is_probabilistic(self) -> bool: + def _models_are_probabilistic(self) -> bool: return all([model._is_probabilistic() for model in self.models]) + def _is_probabilistic(self) -> bool: + return self._models_are_probabilistic() + @property def supports_past_covariates(self) -> bool: return any([model.supports_past_covariates for model in self.models]) diff --git a/darts/models/forecasting/regression_ensemble_model.py b/darts/models/forecasting/regression_ensemble_model.py index a02c680c14..883dc1aaf7 100644 --- a/darts/models/forecasting/regression_ensemble_model.py +++ b/darts/models/forecasting/regression_ensemble_model.py @@ -23,10 +23,12 @@ def __init__( forecasting_models: List[ForecastingModel], regression_train_n_points: int, regression_model=None, + regression_train_num_samples: Optional[int] = 1, + regression_train_samples_reduction: Optional[Union[str, float]] = "median", show_warnings: bool = True, ): """ - Use a regression model for ensembling individual models' predictions. + Use a regression model for ensembling individual models' predictions using the stacking technique [1]_. The provided regression model must implement ``fit()`` and ``predict()`` methods (e.g. scikit-learn regression models). Note that here the regression model is used to learn how to @@ -48,10 +50,35 @@ def __init__( regression_model Any regression model with ``predict()`` and ``fit()`` methods (e.g. from scikit-learn) Default: ``darts.model.LinearRegressionModel(fit_intercept=False)`` + + .. note:: + if `regression_model` is probabilistic, the `RegressionEnsembleModel` will also be probabilistic. + .. + regression_train_num_samples + Number of prediction samples from each forecasting model to train the regression model (samples are + averaged). Should be set to 1 for deterministic models. Default: 1. + + .. note:: + if `forecasting_models` contains a mix of probabilistic and deterministic models, + `regression_train_num_samples will be passed only to the probabilistic ones. + .. + regression_train_samples_reduction + If `forecasting models` are probabilistic and `regression_train_num_samples` > 1, method used to + reduce the samples before passing them to the regression model. Possible values: "mean", "median" + or float value corresponding to the desired quantile. Default: "median" show_warnings Whether to show warnings related to forecasting_models covariates support. + References + ---------- + .. [1] D. H. Wolpert, “Stacked generalization”, Neural Networks, vol. 5, no. 2, pp. 241–259, Jan. 1992 """ - super().__init__(models=forecasting_models, show_warnings=show_warnings) + super().__init__( + models=forecasting_models, + train_num_samples=regression_train_num_samples, + train_samples_reduction=regression_train_samples_reduction, + show_warnings=show_warnings, + ) + if regression_model is None: regression_model = LinearRegressionModel( lags=None, lags_future_covariates=[0], fit_intercept=False @@ -104,7 +131,7 @@ def fit( raise_if( train_n_points_too_big, - "regression_train_n_points parameter too big (must be smaller or " + "`regression_train_n_points` parameter too big (must be smaller or " "equal to the number of points in training_series)", logger, ) @@ -134,7 +161,7 @@ def fit( series=forecast_training, past_covariates=past_covariates, future_covariates=future_covariates, - num_samples=1, + num_samples=self.train_num_samples, ) # train the regression model on the individual models' predictions @@ -160,6 +187,7 @@ def ensemble( self, predictions: Union[TimeSeries, Sequence[TimeSeries]], series: Optional[Sequence[TimeSeries]] = None, + num_samples: int = 1, ) -> Union[TimeSeries, Sequence[TimeSeries]]: is_single_series = isinstance(series, TimeSeries) or series is None @@ -168,7 +196,10 @@ def ensemble( ensembled = [ self.regression_model.predict( - n=len(prediction), series=serie, future_covariates=prediction + n=len(prediction), + series=serie, + future_covariates=prediction, + num_samples=num_samples, ) for serie, prediction in zip(series, predictions) ] @@ -187,3 +218,10 @@ def extreme_lags( ]: extreme_lags_ = super().extreme_lags return (extreme_lags_[0] - self.train_n_points,) + extreme_lags_[1:] + + def _is_probabilistic(self) -> bool: + """ + A RegressionEnsembleModel is probabilistic if its regression + model is probabilistic (ensembling layer) + """ + return self.regression_model._is_probabilistic() diff --git a/darts/tests/models/forecasting/test_ensemble_models.py b/darts/tests/models/forecasting/test_ensemble_models.py index 4a3ca8b024..813c9d634f 100644 --- a/darts/tests/models/forecasting/test_ensemble_models.py +++ b/darts/tests/models/forecasting/test_ensemble_models.py @@ -11,7 +11,6 @@ NaiveDrift, NaiveEnsembleModel, NaiveSeasonal, - RegressionEnsembleModel, StatsForecastAutoARIMA, Theta, ) @@ -125,17 +124,52 @@ def test_predict_ensemble_local_models(self): np.array_equal(forecast_naive_ensemble.values(), forecast_mean.values()) ) - def test_stochastic_ensemble(self): - model1 = LinearRegressionModel(lags=1, likelihood="quantile") - model2 = LinearRegressionModel(lags=2, likelihood="quantile") + def test_stochastic_naive_ensemble(self): + num_samples = 100 - naive_ensemble = NaiveEnsembleModel([model1, model2]) - self.assertTrue(naive_ensemble._is_probabilistic()) + # probabilistic forecasting models + model_proba_1 = LinearRegressionModel( + lags=1, likelihood="quantile", random_state=42 + ) + model_proba_2 = LinearRegressionModel( + lags=2, likelihood="quantile", random_state=42 + ) + + # only probabilistic forecasting models + naive_ensemble_proba = NaiveEnsembleModel([model_proba_1, model_proba_2]) + self.assertTrue(naive_ensemble_proba._is_probabilistic()) + + naive_ensemble_proba.fit(self.series1 + self.series2) + # by default, only 1 sample + pred_proba_1_sample = naive_ensemble_proba.predict(n=5) + self.assertEqual(pred_proba_1_sample.n_samples, 1) + + # possible to obtain probabilistic forecast by averaging samples across the models + pred_proba_many_sample = naive_ensemble_proba.predict( + n=5, num_samples=num_samples + ) + self.assertEqual(pred_proba_many_sample.n_samples, num_samples) - regression_ensemble = RegressionEnsembleModel( - [model1, model2], regression_train_n_points=1 + # need to redefine the models to reset the random state + model_alone_1 = LinearRegressionModel( + lags=1, likelihood="quantile", random_state=42 + ) + model_alone_2 = LinearRegressionModel( + lags=2, likelihood="quantile", random_state=42 + ) + model_alone_1.fit(self.series1 + self.series2) + model_alone_2.fit(self.series1 + self.series2) + forecast_mean = 0.5 * model_alone_1.predict( + 5, num_samples=num_samples + ) + 0.5 * model_alone_2.predict(5, num_samples=num_samples) + + self.assertEqual( + forecast_mean.values().shape, pred_proba_many_sample.values().shape + ) + self.assertEqual(forecast_mean.n_samples, pred_proba_many_sample.n_samples) + self.assertTrue( + np.array_equal(pred_proba_many_sample.values(), forecast_mean.values()) ) - self.assertTrue(regression_ensemble._is_probabilistic()) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_input_models_global_models(self): @@ -261,7 +295,7 @@ def test_predict_with_target(self): series_short = series_long[:25] # train with a single series - ensemble_model = self.get_global_ensembe_model() + ensemble_model = self.get_global_ensemble_model() ensemble_model.fit(series_short, past_covariates=series_long) # predict after end of train series preds = ensemble_model.predict(n=5, past_covariates=series_long) @@ -283,7 +317,7 @@ def test_predict_with_target(self): self.assertTrue(isinstance(preds, list) and len(preds) == 1) # train with multiple series - ensemble_model = self.get_global_ensembe_model() + ensemble_model = self.get_global_ensemble_model() ensemble_model.fit([series_short] * 2, past_covariates=[series_long] * 2) with self.assertRaises(ValueError): # predict without passing series should raise an error @@ -305,7 +339,7 @@ def test_predict_with_target(self): self.assertTrue(isinstance(preds, list) and len(preds) == 1) @staticmethod - def get_global_ensembe_model(output_chunk_length=5): + def get_global_ensemble_model(output_chunk_length=5): lags = [-1, -2, -5] return NaiveEnsembleModel( models=[ diff --git a/darts/tests/models/forecasting/test_regression_ensemble_model.py b/darts/tests/models/forecasting/test_regression_ensemble_model.py index 31d860c950..2537bf5a6c 100644 --- a/darts/tests/models/forecasting/test_regression_ensemble_model.py +++ b/darts/tests/models/forecasting/test_regression_ensemble_model.py @@ -363,3 +363,247 @@ def test_call_backtest_regression_ensemble_local_models(self): # -10 comes from the maximum minimum train series length of all models assert ensemble.extreme_lags == (-10 - regr_train_n, 0, None, None, None, None) ensemble.backtest(series) + + def test_stochastic_regression_ensemble_model(self): + quantiles = [0.25, 0.5, 0.75] + + # probabilistic ensembling model + linreg_prob = LinearRegressionModel( + quantiles=quantiles, lags_future_covariates=[0], likelihood="quantile" + ) + + # deterministic ensembling model + linreg_dete = LinearRegressionModel(lags_future_covariates=[0]) + + # every models are probabilistic + ensemble_allproba = RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_probabilistic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=10, + regression_model=linreg_prob.untrained_model(), + ) + + self.assertTrue(ensemble_allproba._models_are_probabilistic()) + self.assertTrue(ensemble_allproba._is_probabilistic()) + ensemble_allproba.fit(self.ts_random_walk[:100]) + # probabilistic forecasting is supported + pred = ensemble_allproba.predict(5, num_samples=10) + self.assertEqual(pred.n_samples, 10) + + # forecasting models are a mix of probabilistic and deterministic, probabilistic regressor + ensemble_mixproba = RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_deterministic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=10, + regression_model=linreg_prob.untrained_model(), + ) + + self.assertFalse(ensemble_mixproba._models_are_probabilistic()) + self.assertTrue(ensemble_mixproba._is_probabilistic()) + ensemble_mixproba.fit(self.ts_random_walk[:100]) + # probabilistic forecasting is supported + pred = ensemble_mixproba.predict(5, num_samples=10) + self.assertEqual(pred.n_samples, 10) + + # forecasting models are a mix of probabilistic and deterministic, probabilistic regressor + # with regression_train_num_samples > 1 + ensemble_mixproba2 = RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_deterministic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=10, + regression_model=linreg_prob.untrained_model(), + regression_train_num_samples=100, + regression_train_samples_reduction="median", + ) + + self.assertFalse(ensemble_mixproba2._models_are_probabilistic()) + self.assertTrue(ensemble_mixproba2._is_probabilistic()) + ensemble_mixproba2.fit(self.ts_random_walk[:100]) + pred = ensemble_mixproba2.predict(5, num_samples=10) + self.assertEqual(pred.n_samples, 10) + + # only regression model is probabilistic + ensemble_proba_reg = RegressionEnsembleModel( + forecasting_models=[ + self.get_deterministic_global_model([-1, -3]), + self.get_deterministic_global_model([-2, -4]), + ], + regression_train_n_points=10, + regression_model=linreg_prob.untrained_model(), + ) + + self.assertFalse(ensemble_proba_reg._models_are_probabilistic()) + self.assertTrue(ensemble_proba_reg._is_probabilistic()) + ensemble_proba_reg.fit(self.ts_random_walk[:100]) + # probabilistic forecasting is supported + pred = ensemble_proba_reg.predict(5, num_samples=10) + self.assertEqual(pred.n_samples, 10) + + # every models but regression model are probabilistics + ensemble_dete_reg = RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_probabilistic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=10, + regression_model=linreg_dete.untrained_model(), + ) + + self.assertTrue(ensemble_dete_reg._models_are_probabilistic()) + self.assertFalse(ensemble_dete_reg._is_probabilistic()) + ensemble_dete_reg.fit(self.ts_random_walk[:100]) + # deterministic forecasting is supported + ensemble_dete_reg.predict(5, num_samples=1) + # probabilistic forecasting is not supported + with self.assertRaises(ValueError): + ensemble_dete_reg.predict(5, num_samples=10) + + # every models are deterministic + ensemble_alldete = RegressionEnsembleModel( + forecasting_models=[ + self.get_deterministic_global_model([-1, -3]), + self.get_deterministic_global_model([-2, -4]), + ], + regression_train_n_points=10, + regression_model=linreg_dete.untrained_model(), + ) + + self.assertFalse(ensemble_alldete._models_are_probabilistic()) + self.assertFalse(ensemble_alldete._is_probabilistic()) + ensemble_alldete.fit(self.ts_random_walk[:100]) + # deterministic forecasting is supported + ensemble_alldete.predict(5, num_samples=1) + # probabilistic forecasting is not supported + with self.assertRaises(ValueError): + ensemble_alldete.predict(5, num_samples=10) + + # deterministic forecasters cannot be sampled + with self.assertRaises(ValueError): + RegressionEnsembleModel( + forecasting_models=[ + self.get_deterministic_global_model([-1, -3]), + self.get_deterministic_global_model([-2, -4]), + ], + regression_train_n_points=10, + regression_model=linreg_prob.untrained_model(), + regression_train_num_samples=10, + ) + + def test_stochastic_training_regression_ensemble_model(self): + """ + regression model is deterministic (default) but the forecasting models are + probabilistic and they can be sampled to train the regression model. + """ + quantiles = [0.25, 0.5, 0.75] + + # cannot sample deterministic forecasting models + with self.assertRaises(ValueError): + RegressionEnsembleModel( + forecasting_models=[ + self.get_deterministic_global_model([-1, -3]), + self.get_deterministic_global_model([-2, -4]), + ], + regression_train_n_points=50, + regression_train_num_samples=500, + ) + + # must use apprioriate reduction method + with self.assertRaises(ValueError): + RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_probabilistic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=50, + regression_train_num_samples=500, + regression_train_samples_reduction="wrong", + ) + + # by default, does not reduce samples and convert them to components + ensemble_model_mean = RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_probabilistic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=50, + regression_train_num_samples=500, + regression_train_samples_reduction="mean", + ) + + ensemble_model_median = RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_probabilistic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=50, + regression_train_num_samples=500, + ) + self.assertEqual(ensemble_model_median.train_samples_reduction, "median") + + ensemble_model_0_5_quantile = RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_probabilistic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=50, + regression_train_num_samples=500, + regression_train_samples_reduction=0.5, + ) + + train, val = self.ts_sum1.split_after(0.9) + ensemble_model_mean.fit(train) + ensemble_model_median.fit(train) + ensemble_model_0_5_quantile.fit(train) + + pred_mean_training = ensemble_model_mean.predict(len(val)) + pred_median_training = ensemble_model_median.predict(len(val)) + pred_0_5_qt_training = ensemble_model_0_5_quantile.predict(len(val)) + + self.assertEqual(pred_median_training, pred_0_5_qt_training) + self.assertEqual( + pred_mean_training.all_values().shape, + pred_median_training.all_values().shape, + ) + + # deterministic regression model -> deterministic ensemble + with self.assertRaises(ValueError): + ensemble_model_mean.predict(len(val), num_samples=100) + with self.assertRaises(ValueError): + ensemble_model_median.predict(len(val), num_samples=100) + with self.assertRaises(ValueError): + ensemble_model_0_5_quantile.predict(len(val), num_samples=100) + + # possible to use very small regression_train_num_samples + ensemble_model_mean_1_sample = RegressionEnsembleModel( + forecasting_models=[ + self.get_probabilistic_global_model([-1, -3], quantiles), + self.get_probabilistic_global_model([-2, -4], quantiles), + ], + regression_train_n_points=50, + regression_train_num_samples=1, + ) + ensemble_model_mean_1_sample.fit(train) + ensemble_model_mean_1_sample.predict(len(val)) + + # multi-series support + ensemble_model_median.fit([train, train + 100]) + ensemble_model_mean.predict(len(val), series=train) + + @staticmethod + def get_probabilistic_global_model(lags, quantiles, random_state=42): + return LinearRegressionModel( + lags=lags, + quantiles=quantiles, + likelihood="quantile", + random_state=random_state, + ) + + @staticmethod + def get_deterministic_global_model(lags, random_state=13): + return LinearRegressionModel(lags=lags, random_state=random_state) diff --git a/examples/00-quickstart.ipynb b/examples/00-quickstart.ipynb index 01c879308d..bba9b1dd79 100644 --- a/examples/00-quickstart.ipynb +++ b/examples/00-quickstart.ipynb @@ -2767,6 +2767,87 @@ }, { "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "By using a probabilistic regression model, the `RegressionEnsembleModel` can also generate probabilistic forecasts:" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8ca7a08507a741f1bb55d4c2136fb1ba", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/57 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from darts.models import LinearRegressionModel\n", + "\n", + "quantiles = [0.25, 0.5, 0.75]\n", + "\n", + "models = [NaiveDrift(), NaiveSeasonal(12)]\n", + "\n", + "regression_model = LinearRegressionModel(\n", + " quantiles=quantiles,\n", + " lags_future_covariates=[0],\n", + " likelihood=\"quantile\",\n", + " fit_intercept=False,\n", + ")\n", + "\n", + "ensemble_model = RegressionEnsembleModel(\n", + " forecasting_models=models,\n", + " regression_train_n_points=12,\n", + " regression_model=regression_model,\n", + ")\n", + "\n", + "backtest = ensemble_model.historical_forecasts(\n", + " series_air, start=0.6, forecast_horizon=3, num_samples=500, verbose=True\n", + ")\n", + "\n", + "print(\"MAPE = %.2f\" % (mape(backtest, series_air)))\n", + "series_air.plot()\n", + "backtest.plot()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "`RegressionEnsembleModel` uses the *stacking* technique to train and combine the `forecasting_models`: each one of them is trained independently and the `regression_model` is then trained using their predictions as `future_covariates`." + ] + }, + { "cell_type": "markdown", "metadata": {}, "source": [