unit8co
diff --git a/‎CHANGELOG.md
+1 b/‎CHANGELOG.md
+1
diff --git a/‎darts/models/forecasting/baselines.py
+13-6 b/‎darts/models/forecasting/baselines.py
+13-6
diff --git a/‎darts/models/forecasting/ensemble_model.py
+97-6 b/‎darts/models/forecasting/ensemble_model.py
+97-6
diff --git a/‎darts/models/forecasting/regression_ensemble_model.py
+43-5 b/‎darts/models/forecasting/regression_ensemble_model.py
+43-5
@@ -17,6 +17,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
 - Improvements to `EnsembleModel`:
   - Model creation parameter `forecasting_models` now supports a mix of `LocalForecastingModel` and `GlobalForecastingModel` (single `TimeSeries` training/inference only, due to the local models). [#1745](https://github.com/unit8co/darts/pull/1745) by [Antoine Madrona](https://github.com/madtoinou).
   - Future and past covariates can now be used even if `forecasting_models` have different covariates support. The covariates passed to `fit()`/`predict()` are used only by models that support it. [#1745](https://github.com/unit8co/darts/pull/1745) by [Antoine Madrona](https://github.com/madtoinou).
+  - `RegressionEnsembleModel` and `NaiveEnsembleModel` can generate probabilistic forecasts, probabilistics `forecasting_models` can be sampled to train the `regression_model`, updated the documentation (stacking technique). [#1692](https://github.com/unit8co/darts/pull/#1692) by [Antoine Madrona](https://github.com/madtoinou).
 - Improvements to `ShapExplainer`:
   - Added static covariates support to `ShapeExplainer`. [#1803](https://github.com/unit8co/darts/pull/#1803) by [Anne de Vries](https://github.com/anne-devries) and [Dennis Bader](https://github.com/dennisbader).
 
 
@@ -12,7 +12,7 @@
 from darts.logging import get_logger, raise_if_not
 from darts.models.forecasting.ensemble_model import EnsembleModel
 from darts.models.forecasting.forecasting_model import (
-    GlobalForecastingModel,
+    ForecastingModel,
     LocalForecastingModel,
 )
 from darts.timeseries import TimeSeries
@@ -164,7 +164,7 @@ def predict(self, n: int, num_samples: int = 1, verbose: bool = False):
 class NaiveEnsembleModel(EnsembleModel):
     def __init__(
         self,
-        models: Union[List[LocalForecastingModel], List[GlobalForecastingModel]],
+        models: List[ForecastingModel],
         show_warnings: bool = True,
     ):
         """Naive combination model
@@ -182,7 +182,12 @@ def __init__(
         show_warnings
             Whether to show warnings related to models covariates support.
         """
-        super().__init__(models=models, show_warnings=show_warnings)
+        super().__init__(
+            models=models,
+            train_num_samples=None,
+            train_samples_reduction=None,
+            show_warnings=show_warnings,
+        )
 
     def fit(
         self,
@@ -209,11 +214,13 @@ def ensemble(
         self,
         predictions: Union[TimeSeries, Sequence[TimeSeries]],
         series: Optional[Sequence[TimeSeries]] = None,
+        num_samples: int = 1,
     ) -> Union[TimeSeries, Sequence[TimeSeries]]:
         def take_average(prediction: TimeSeries) -> TimeSeries:
-            series = prediction.pd_dataframe(copy=False).sum(axis=1) / len(self.models)
-            series.name = prediction.components[0]
-            return TimeSeries.from_series(series)
+            # average across the components, keep n_samples, rename components
+            return prediction.mean(axis=1).with_columns_renamed(
+                "components_mean", prediction.components[0]
+            )
 
         if isinstance(predictions, Sequence):
             return [take_average(p) for p in predictions]
 
@@ -6,13 +6,14 @@
 from functools import reduce
 from typing import List, Optional, Sequence, Tuple, Union
 
-from darts.logging import get_logger, raise_if, raise_if_not
+from darts.logging import get_logger, raise_if, raise_if_not, raise_log
 from darts.models.forecasting.forecasting_model import (
     ForecastingModel,
     GlobalForecastingModel,
     LocalForecastingModel,
 )
 from darts.timeseries import TimeSeries
+from darts.utils.utils import series2seq
 
 logger = get_logger(__name__)
 
@@ -30,11 +31,28 @@ class EnsembleModel(GlobalForecastingModel):
     ----------
     models
         List of forecasting models whose predictions to ensemble
+
+        .. note::
+                if all the models are probabilistic, the `EnsembleModel` will also be probabilistic.
+        ..
+    train_num_samples
+        Number of prediction samples from each forecasting model for multi-level ensembles. The n_samples
+        dimension will be reduced using the `train_samples_reduction` method.
+    train_samples_reduction
+        If `models` are probabilistic and `train_num_samples` > 1, method used to
+        reduce the samples dimension to 1. Possible values: "mean", "median" or float value corresponding
+        to the desired quantile.
     show_warnings
         Whether to show warnings related to models covariates support.
     """
 
-    def __init__(self, models: List[ForecastingModel], show_warnings: bool = True):
+    def __init__(
+        self,
+        models: List[ForecastingModel],
+        train_num_samples: int,
+        train_samples_reduction: Union[str, float],
+        show_warnings: bool = True,
+    ):
         raise_if_not(
             isinstance(models, list) and models,
             "Cannot instantiate EnsembleModel with an empty list of models",
@@ -70,8 +88,44 @@ def __init__(self, models: List[ForecastingModel], show_warnings: bool = True):
             logger,
         )
 
+        raise_if(
+            train_num_samples is not None
+            and train_num_samples > 1
+            and all([not m._is_probabilistic() for m in models]),
+            "`train_num_samples` is greater than 1 but the `RegressionEnsembleModel` "
+            "contains only deterministic models.",
+            logger,
+        )
+
+        supported_reduction = ["mean", "median"]
+        if train_samples_reduction is None:
+            pass
+        elif isinstance(train_samples_reduction, float):
+            raise_if_not(
+                0.0 < train_samples_reduction < 1.0,
+                f"if a float, `train_samples_reduction` must be between "
+                f"0 and 1, received ({train_samples_reduction})",
+                logger,
+            )
+        elif isinstance(train_samples_reduction, str):
+            raise_if(
+                train_samples_reduction not in supported_reduction,
+                f"if a string, `train_samples_reduction` must be one of {supported_reduction}, "
+                f"received ({train_samples_reduction})",
+                logger,
+            )
+        else:
+            raise_log(
+                f"`train_samples_reduction` type not supported "
+                f"({train_samples_reduction}). Must be `float` "
+                f" or one of {supported_reduction}.",
+                logger,
+            )
+
         super().__init__()
         self.models = models
+        self.train_num_samples = train_num_samples
+        self.train_samples_reduction = train_samples_reduction
 
         if show_warnings:
             if (
@@ -94,6 +148,7 @@ def __init__(self, models: List[ForecastingModel], show_warnings: bool = True):
                     "To hide these warnings, set `show_warnings=False`."
                 )
 
+    @abstractmethod
     def fit(
         self,
         series: Union[TimeSeries, Sequence[TimeSeries]],
@@ -173,10 +228,21 @@ def _make_multiple_predictions(
                 future_covariates=future_covariates
                 if model.supports_future_covariates
                 else None,
-                num_samples=num_samples,
+                num_samples=num_samples if model._is_probabilistic() else 1,
             )
             for model in self.models
         ]
+
+        # reduce the probabilistics series
+        if (
+            self.train_samples_reduction is not None
+            and self.train_num_samples is not None
+            and self.train_num_samples > 1
+        ):
+            predictions = [
+                self._predictions_reduction(prediction) for prediction in predictions
+            ]
+
         return (
             self._stack_ts_seq(predictions)
             if is_single_series
@@ -202,22 +268,30 @@ def predict(
             verbose=verbose,
         )
 
+        # for multi-level models, forecasting models can generate arbitrary number of samples
+        if self.train_samples_reduction is None:
+            pred_num_samples = num_samples
+        else:
+            pred_num_samples = self.train_num_samples
+
         self._verify_past_future_covariates(past_covariates, future_covariates)
 
         predictions = self._make_multiple_predictions(
             n=n,
             series=series,
             past_covariates=past_covariates,
             future_covariates=future_covariates,
-            num_samples=num_samples,
+            num_samples=pred_num_samples,
         )
-        return self.ensemble(predictions, series=series)
+
+        return self.ensemble(predictions, series=series, num_samples=num_samples)
 
     @abstractmethod
     def ensemble(
         self,
         predictions: Union[TimeSeries, Sequence[TimeSeries]],
         series: Optional[Sequence[TimeSeries]] = None,
+        num_samples: int = 1,
     ) -> Union[TimeSeries, Sequence[TimeSeries]]:
         """
         Defines how to ensemble the individual models' predictions to produce a single prediction.
@@ -237,6 +311,20 @@ def ensemble(
         """
         pass
 
+    def _predictions_reduction(self, predictions: TimeSeries) -> TimeSeries:
+        """Reduce the sample dimension of the forecasting models predictions"""
+        is_single_series = isinstance(predictions, TimeSeries)
+        predictions = series2seq(predictions)
+        if self.train_samples_reduction == "median":
+            predictions = [pred.median(axis=2) for pred in predictions]
+        elif self.train_samples_reduction == "mean":
+            predictions = [pred.mean(axis=2) for pred in predictions]
+        else:
+            predictions = [
+                pred.quantile(self.train_samples_reduction) for pred in predictions
+            ]
+        return predictions[0] if is_single_series else predictions
+
     @property
     def min_train_series_length(self) -> int:
         return max(model.min_train_series_length for model in self.models)
@@ -271,9 +359,12 @@ def find_max_lag_or_none(lag_id, aggregator) -> Optional[int]:
             find_max_lag_or_none(i, agg) for i, agg in enumerate(lag_aggregators)
         )
 
-    def _is_probabilistic(self) -> bool:
+    def _models_are_probabilistic(self) -> bool:
         return all([model._is_probabilistic() for model in self.models])
 
+    def _is_probabilistic(self) -> bool:
+        return self._models_are_probabilistic()
+
     @property
     def supports_past_covariates(self) -> bool:
         return any([model.supports_past_covariates for model in self.models])
 
@@ -23,10 +23,12 @@ def __init__(
         forecasting_models: List[ForecastingModel],
         regression_train_n_points: int,
         regression_model=None,
+        regression_train_num_samples: Optional[int] = 1,
+        regression_train_samples_reduction: Optional[Union[str, float]] = "median",
         show_warnings: bool = True,
     ):
         """
-        Use a regression model for ensembling individual models' predictions.
+        Use a regression model for ensembling individual models' predictions using the stacking technique [1]_.
 
         The provided regression model must implement ``fit()`` and ``predict()`` methods
         (e.g. scikit-learn regression models). Note that here the regression model is used to learn how to
@@ -48,10 +50,35 @@ def __init__(
         regression_model
             Any regression model with ``predict()`` and ``fit()`` methods (e.g. from scikit-learn)
             Default: ``darts.model.LinearRegressionModel(fit_intercept=False)``
+
+            .. note::
+                if `regression_model` is probabilistic, the `RegressionEnsembleModel` will also be probabilistic.
+            ..
+        regression_train_num_samples
+            Number of prediction samples from each forecasting model to train the regression model (samples are
+            averaged). Should be set to 1 for deterministic models. Default: 1.
+
+            .. note::
+                if `forecasting_models` contains a mix of probabilistic and deterministic models,
+                `regression_train_num_samples will be passed only to the probabilistic ones.
+            ..
+        regression_train_samples_reduction
+            If `forecasting models` are probabilistic and `regression_train_num_samples` > 1, method used to
+            reduce the samples before passing them to the regression model. Possible values: "mean", "median"
+            or float value corresponding to the desired quantile. Default: "median"
         show_warnings
             Whether to show warnings related to forecasting_models covariates support.
+        References
+        ----------
+        .. [1] D. H. Wolpert, “Stacked generalization”, Neural Networks, vol. 5, no. 2, pp. 241–259, Jan. 1992
         """
-        super().__init__(models=forecasting_models, show_warnings=show_warnings)
+        super().__init__(
+            models=forecasting_models,
+            train_num_samples=regression_train_num_samples,
+            train_samples_reduction=regression_train_samples_reduction,
+            show_warnings=show_warnings,
+        )
+
         if regression_model is None:
             regression_model = LinearRegressionModel(
                 lags=None, lags_future_covariates=[0], fit_intercept=False
@@ -104,7 +131,7 @@ def fit(
 
         raise_if(
             train_n_points_too_big,
-            "regression_train_n_points parameter too big (must be smaller or "
+            "`regression_train_n_points` parameter too big (must be smaller or "
             "equal to the number of points in training_series)",
             logger,
         )
@@ -134,7 +161,7 @@ def fit(
             series=forecast_training,
             past_covariates=past_covariates,
             future_covariates=future_covariates,
-            num_samples=1,
+            num_samples=self.train_num_samples,
         )
 
         # train the regression model on the individual models' predictions
@@ -160,6 +187,7 @@ def ensemble(
         self,
         predictions: Union[TimeSeries, Sequence[TimeSeries]],
         series: Optional[Sequence[TimeSeries]] = None,
+        num_samples: int = 1,
     ) -> Union[TimeSeries, Sequence[TimeSeries]]:
 
         is_single_series = isinstance(series, TimeSeries) or series is None
@@ -168,7 +196,10 @@ def ensemble(
 
         ensembled = [
             self.regression_model.predict(
-                n=len(prediction), series=serie, future_covariates=prediction
+                n=len(prediction),
+                series=serie,
+                future_covariates=prediction,
+                num_samples=num_samples,
             )
             for serie, prediction in zip(series, predictions)
         ]
@@ -187,3 +218,10 @@ def extreme_lags(
     ]:
         extreme_lags_ = super().extreme_lags
         return (extreme_lags_[0] - self.train_n_points,) + extreme_lags_[1:]
+
+    def _is_probabilistic(self) -> bool:
+        """
+        A RegressionEnsembleModel is probabilistic if its regression
+        model is probabilistic (ensembling layer)
+        """
+        return self.regression_model._is_probabilistic()