unit8co · dennisbader · Mar 22, 2023 · Feb 24, 2023 · Mar 3, 2023 · Mar 3, 2023
@@ -922,6 +922,7 @@ def historical_forecasts(
     def backtest(
         self,
         series: Union[TimeSeries, Sequence[TimeSeries]],
+        historical_forecasts: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
         past_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
         future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None,
         num_samples: int = 1,
@@ -942,12 +943,12 @@ def backtest(
         """Compute error values that the model would have produced when
         used on (potentially multiple) `series`.
 
-        It repeatedly builds a training set from the beginning of `series`. It trains the
-        current model on the training set, emits a forecast of length equal to forecast_horizon, and then moves
-        the end of the
-        training set forward by `stride` time steps. A metric (given by the `metric` function) is then evaluated
-        on the forecast and the actual values. Finally, the method returns a `reduction` (the mean by default)
-        of all these metric scores.
+        If `historical_forecasts` are provided, the metric (given by the `metric` function) is evaluated directly on
+        the forecast and the actual values. Otherwise, it repeatedly builds a training set from the beginning of
+        `series`. It trains the current model on the training set, emits a forecast of length equal to
+        `forecast_horizon`, and then moves the end of the training set forward by `stride` time steps. The metric is
+        then evaluated on the forecast and the actual values. Finally, the method returns a `reduction` (the mean by
+        default) of all these metric scores.
 
         By default, this method uses each historical forecast (whole) to compute error scores.
         If `last_points_only` is set to True, it will use only the last point of each historical
@@ -964,6 +965,11 @@ def backtest(
         ----------
         series
             The (or a sequence of) target time series to use to successively train and evaluate the historical forecasts
+        historical_forecasts
+            Optionally, the (or a sequence of) historical forecasts time series to be evaluated. Corresponds to
+            the output of :meth:`historical_forecasts() <ForecastingModel.historical_forecasts>`. If provided, will
+            skip historical forecasting and ignore parameters `num_samples`, `train_length`, `start`,
+            `forecast_horizon`, `stride`, `retrain`, `overlap_end`, and `last_points_only`.
         past_covariates
             Optionally, one (or a sequence of) past-observed covariate series.
             This applies only if the model supports past covariates.
@@ -1035,21 +1041,23 @@ def backtest(
             The (sequence of) error score on a series, or list of list containing error scores for each
             provided series and each sample.
         """
-
-        forecasts = self.historical_forecasts(
-            series=series,
-            past_covariates=past_covariates,
-            future_covariates=future_covariates,
-            num_samples=num_samples,
-            train_length=train_length,
-            start=start,
-            forecast_horizon=forecast_horizon,
-            stride=stride,
-            retrain=retrain,
-            overlap_end=overlap_end,
-            last_points_only=last_points_only,
-            verbose=verbose,
-        )
+        if historical_forecasts is None:
+            forecasts = self.historical_forecasts(
+                series=series,
+                past_covariates=past_covariates,
+                future_covariates=future_covariates,
+                num_samples=num_samples,
+                train_length=train_length,
+                start=start,
+                forecast_horizon=forecast_horizon,
+                stride=stride,
+                retrain=retrain,
+                overlap_end=overlap_end,
+                last_points_only=last_points_only,
+                verbose=verbose,
+            )
+        else:
+            forecasts = historical_forecasts
 
         series = series2seq(series)
         if len(series) == 1:

@@ -116,6 +116,22 @@ def test_backtest_forecasting(self):
         )
         self.assertEqual(score, 1.0)
 
+        # univariate model + univariate series + historical_forecasts precalculated
+        forecasts = NaiveDrift().historical_forecasts(
+            linear_series,
+            start=pd.Timestamp("20000201"),
+            forecast_horizon=3,
+            last_points_only=False,
+        )
+        precalculated_forecasts_score = NaiveDrift().backtest(
+            linear_series,
+            historical_forecasts=forecasts,
+            start=pd.Timestamp("20000201"),
+            forecast_horizon=3,
+            metric=r2_score,
+        )
+        self.assertEqual(score, precalculated_forecasts_score)
+
         # very large train length should not affect the backtest
         score = NaiveDrift().backtest(
             linear_series,