From 6a06d87ae31b2636b03fd0861205dcca38dfd4fa Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 13 Nov 2022 12:45:15 +0100 Subject: [PATCH 1/3] fix regression ensemble models when training on single series and using another series at predict time --- darts/models/forecasting/ensemble_model.py | 6 +- .../forecasting/regression_ensemble_model.py | 11 ++-- .../test_regression_ensemble_model.py | 60 +++++++++++++++---- 3 files changed, 57 insertions(+), 20 deletions(-) diff --git a/darts/models/forecasting/ensemble_model.py b/darts/models/forecasting/ensemble_model.py index 1cebb6bfe3..d89bb8ccfa 100644 --- a/darts/models/forecasting/ensemble_model.py +++ b/darts/models/forecasting/ensemble_model.py @@ -165,11 +165,7 @@ def predict( future_covariates=future_covariates, num_samples=num_samples, ) - - if self.is_single_series: - return self.ensemble(predictions) - else: - return self.ensemble(predictions, series) + return self.ensemble(predictions, series=series) @abstractmethod def ensemble( diff --git a/darts/models/forecasting/regression_ensemble_model.py b/darts/models/forecasting/regression_ensemble_model.py index 0fe6b8a01f..7a937a4916 100644 --- a/darts/models/forecasting/regression_ensemble_model.py +++ b/darts/models/forecasting/regression_ensemble_model.py @@ -15,6 +15,7 @@ from darts.models.forecasting.linear_regression_model import LinearRegressionModel from darts.models.forecasting.regression_model import RegressionModel from darts.timeseries import TimeSeries +from darts.utils.utils import seq2series, series2seq logger = get_logger(__name__) @@ -156,9 +157,10 @@ def ensemble( predictions: Union[TimeSeries, Sequence[TimeSeries]], series: Optional[Sequence[TimeSeries]] = None, ) -> Union[TimeSeries, Sequence[TimeSeries]]: - if self.is_single_series: - predictions = [predictions] - series = [series] + + is_single_series = isinstance(series, TimeSeries) or series is None + predictions = series2seq(predictions) + series = series2seq(series) if series is not None else [None] ensembled = [ self.regression_model.predict( @@ -166,5 +168,4 @@ def ensemble( ) for serie, prediction in zip(series, predictions) ] - - return ensembled[0] if self.is_single_series else ensembled + return seq2series(ensembled) if is_single_series else ensembled diff --git a/darts/tests/models/forecasting/test_regression_ensemble_model.py b/darts/tests/models/forecasting/test_regression_ensemble_model.py index 511dc75d17..49837f3775 100644 --- a/darts/tests/models/forecasting/test_regression_ensemble_model.py +++ b/darts/tests/models/forecasting/test_regression_ensemble_model.py @@ -2,13 +2,21 @@ import numpy as np import pandas as pd +import pytest from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression from darts import TimeSeries from darts.logging import get_logger from darts.metrics import rmse -from darts.models import NaiveDrift, NaiveSeasonal +from darts.models import ( + LinearRegressionModel, + NaiveDrift, + NaiveSeasonal, + RandomForest, + RegressionEnsembleModel, + RegressionModel, +) from darts.tests.base_test_class import DartsBaseTestClass from darts.tests.models.forecasting.test_ensemble_models import _make_ts from darts.tests.models.forecasting.test_regression_models import train_test_split @@ -19,14 +27,7 @@ try: import torch - from darts.models import ( - BlockRNNModel, - LinearRegressionModel, - RandomForest, - RegressionEnsembleModel, - RegressionModel, - RNNModel, - ) + from darts.models import BlockRNNModel, RNNModel TORCH_AVAILABLE = True except ImportError: @@ -85,6 +86,24 @@ def get_global_models(self, output_chunk_length=5): ), ] + def get_global_ensembe_model(self, output_chunk_length=5): + lags = [-1, -2, -5] + return RegressionEnsembleModel( + forecasting_models=[ + LinearRegressionModel( + lags=lags, + lags_past_covariates=lags, + output_chunk_length=output_chunk_length, + ), + LinearRegressionModel( + lags=lags, + lags_past_covariates=lags, + output_chunk_length=output_chunk_length, + ), + ], + regression_train_n_points=10, + ) + @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_accepts_different_regression_models(self): regr1 = LinearRegression() @@ -120,7 +139,7 @@ def test_train_n_points(self): regr = LinearRegressionModel(lags_future_covariates=[0]) # same values - ensemble = RegressionEnsembleModel(self.get_local_models(), 5, regr) + _ = RegressionEnsembleModel(self.get_local_models(), 5, regr) # too big value to perform the split ensemble = RegressionEnsembleModel(self.get_local_models(), 100) @@ -182,6 +201,27 @@ def test_train_predict_global_models_multivar_with_covariates(self): ensemble.fit(self.seq1, self.cov1) ensemble.predict(10, self.seq2, self.cov2) + def test_predict_with_target(self): + series_long = self.combined + series_short = series_long[:50] + + # train with a single series + ensemble_model = self.get_global_ensembe_model() + ensemble_model.fit(series_short, past_covariates=series_long) + # predict after end of train series + ensemble_model.predict(n=5, past_covariates=series_long) + # predict a new target series + ensemble_model.predict(n=5, series=series_long, past_covariates=series_long) + + # train with multiple series + ensemble_model = self.get_global_ensembe_model() + ensemble_model.fit([series_short] * 2, past_covariates=[series_long] * 2) + with pytest.raises(ValueError): + # predict without passing series should raise an error + ensemble_model.predict(n=5, past_covariates=series_long) + # predict a new target series + ensemble_model.predict(n=5, series=series_long, past_covariates=series_long) + @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def helper_test_models_accuracy( self, model_instance, n, series, past_covariates, min_rmse From 7a38bcf554556a3d5fe6c98137fd36f2cd5b31c2 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 13 Nov 2022 13:13:28 +0100 Subject: [PATCH 2/3] additional fixes for single vs multiple target series --- darts/models/forecasting/ensemble_model.py | 18 ++--- .../forecasting/regression_ensemble_model.py | 5 +- .../forecasting/test_ensemble_models.py | 67 +++++++++++++++++++ .../test_regression_ensemble_model.py | 41 ++++++++++-- 4 files changed, 113 insertions(+), 18 deletions(-) diff --git a/darts/models/forecasting/ensemble_model.py b/darts/models/forecasting/ensemble_model.py index d89bb8ccfa..53fe4f15e6 100644 --- a/darts/models/forecasting/ensemble_model.py +++ b/darts/models/forecasting/ensemble_model.py @@ -59,7 +59,6 @@ def __init__( super().__init__() self.models = models - self.is_single_series = None def fit( self, @@ -83,16 +82,16 @@ def fit( logger, ) - self.is_single_series = isinstance(series, TimeSeries) + is_single_series = isinstance(series, TimeSeries) # check that if timeseries is single series, than covariates are as well and vice versa error = False if past_covariates is not None: - error = self.is_single_series != isinstance(past_covariates, TimeSeries) + error = is_single_series != isinstance(past_covariates, TimeSeries) if future_covariates is not None: - error = self.is_single_series != isinstance(future_covariates, TimeSeries) + error = is_single_series != isinstance(future_covariates, TimeSeries) raise_if( error, @@ -125,6 +124,7 @@ def _make_multiple_predictions( future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, num_samples: int = 1, ): + is_single_series = isinstance(series, TimeSeries) or series is None predictions = [ model._predict_wrapper( n=n, @@ -135,11 +135,11 @@ def _make_multiple_predictions( ) for model in self.models ] - - if self.is_single_series: - return self._stack_ts_seq(predictions) - else: - return self._stack_ts_multiseq(predictions) + return ( + self._stack_ts_seq(predictions) + if is_single_series + else self._stack_ts_multiseq(predictions) + ) def predict( self, diff --git a/darts/models/forecasting/regression_ensemble_model.py b/darts/models/forecasting/regression_ensemble_model.py index 7a937a4916..7a46754f8d 100644 --- a/darts/models/forecasting/regression_ensemble_model.py +++ b/darts/models/forecasting/regression_ensemble_model.py @@ -91,7 +91,8 @@ def fit( ) # spare train_n_points points to serve as regression target - if self.is_single_series: + is_single_series = isinstance(series, TimeSeries) + if is_single_series: train_n_points_too_big = len(self.training_series) <= self.train_n_points else: train_n_points_too_big = any( @@ -105,7 +106,7 @@ def fit( logger, ) - if self.is_single_series: + if is_single_series: forecast_training = self.training_series[: -self.train_n_points] regression_target = self.training_series[-self.train_n_points :] else: diff --git a/darts/tests/models/forecasting/test_ensemble_models.py b/darts/tests/models/forecasting/test_ensemble_models.py index 96a17854fc..68e9edcfe2 100644 --- a/darts/tests/models/forecasting/test_ensemble_models.py +++ b/darts/tests/models/forecasting/test_ensemble_models.py @@ -7,6 +7,7 @@ from darts.logging import get_logger from darts.models import ( ExponentialSmoothing, + LinearRegressionModel, NaiveDrift, NaiveEnsembleModel, NaiveSeasonal, @@ -148,6 +149,72 @@ def test_fit_univar_ts_with_covariates_for_local_models(self): with self.assertRaises(ValueError): naive.fit(self.series1, self.series2) + def test_predict_with_target(self): + series_long = self.series1 + series_short = series_long[:25] + + # train with a single series + ensemble_model = self.get_global_ensembe_model() + ensemble_model.fit(series_short, past_covariates=series_long) + # predict after end of train series + preds = ensemble_model.predict(n=5, past_covariates=series_long) + self.assertTrue(isinstance(preds, TimeSeries)) + # predict a new target series + preds = ensemble_model.predict( + n=5, series=series_long, past_covariates=series_long + ) + self.assertTrue(isinstance(preds, TimeSeries)) + # predict multiple target series + preds = ensemble_model.predict( + n=5, series=[series_long] * 2, past_covariates=[series_long] * 2 + ) + self.assertTrue(isinstance(preds, list) and len(preds) == 2) + # predict single target series in list + preds = ensemble_model.predict( + n=5, series=[series_long], past_covariates=[series_long] + ) + self.assertTrue(isinstance(preds, list) and len(preds) == 1) + + # train with multiple series + ensemble_model = self.get_global_ensembe_model() + ensemble_model.fit([series_short] * 2, past_covariates=[series_long] * 2) + with self.assertRaises(ValueError): + # predict without passing series should raise an error + ensemble_model.predict(n=5, past_covariates=series_long) + # predict a new target series + preds = ensemble_model.predict( + n=5, series=series_long, past_covariates=series_long + ) + self.assertTrue(isinstance(preds, TimeSeries)) + # predict multiple target series + preds = ensemble_model.predict( + n=5, series=[series_long] * 2, past_covariates=[series_long] * 2 + ) + self.assertTrue(isinstance(preds, list) and len(preds) == 2) + # predict single target series in list + preds = ensemble_model.predict( + n=5, series=[series_long], past_covariates=[series_long] + ) + self.assertTrue(isinstance(preds, list) and len(preds) == 1) + + @staticmethod + def get_global_ensembe_model(output_chunk_length=5): + lags = [-1, -2, -5] + return NaiveEnsembleModel( + models=[ + LinearRegressionModel( + lags=lags, + lags_past_covariates=lags, + output_chunk_length=output_chunk_length, + ), + LinearRegressionModel( + lags=lags, + lags_past_covariates=lags, + output_chunk_length=output_chunk_length, + ), + ], + ) + if __name__ == "__main__": import unittest diff --git a/darts/tests/models/forecasting/test_regression_ensemble_model.py b/darts/tests/models/forecasting/test_regression_ensemble_model.py index 49837f3775..cc8fa5fe32 100644 --- a/darts/tests/models/forecasting/test_regression_ensemble_model.py +++ b/darts/tests/models/forecasting/test_regression_ensemble_model.py @@ -2,7 +2,6 @@ import numpy as np import pandas as pd -import pytest from sklearn.ensemble import RandomForestRegressor from sklearn.linear_model import LinearRegression @@ -86,7 +85,8 @@ def get_global_models(self, output_chunk_length=5): ), ] - def get_global_ensembe_model(self, output_chunk_length=5): + @staticmethod + def get_global_ensembe_model(output_chunk_length=5): lags = [-1, -2, -5] return RegressionEnsembleModel( forecasting_models=[ @@ -203,24 +203,51 @@ def test_train_predict_global_models_multivar_with_covariates(self): def test_predict_with_target(self): series_long = self.combined - series_short = series_long[:50] + series_short = series_long[:25] # train with a single series ensemble_model = self.get_global_ensembe_model() ensemble_model.fit(series_short, past_covariates=series_long) # predict after end of train series - ensemble_model.predict(n=5, past_covariates=series_long) + preds = ensemble_model.predict(n=5, past_covariates=series_long) + self.assertTrue(isinstance(preds, TimeSeries)) # predict a new target series - ensemble_model.predict(n=5, series=series_long, past_covariates=series_long) + preds = ensemble_model.predict( + n=5, series=series_long, past_covariates=series_long + ) + self.assertTrue(isinstance(preds, TimeSeries)) + # predict multiple target series + preds = ensemble_model.predict( + n=5, series=[series_long] * 2, past_covariates=[series_long] * 2 + ) + self.assertTrue(isinstance(preds, list) and len(preds) == 2) + # predict single target series in list + preds = ensemble_model.predict( + n=5, series=[series_long], past_covariates=[series_long] + ) + self.assertTrue(isinstance(preds, list) and len(preds) == 1) # train with multiple series ensemble_model = self.get_global_ensembe_model() ensemble_model.fit([series_short] * 2, past_covariates=[series_long] * 2) - with pytest.raises(ValueError): + with self.assertRaises(ValueError): # predict without passing series should raise an error ensemble_model.predict(n=5, past_covariates=series_long) # predict a new target series - ensemble_model.predict(n=5, series=series_long, past_covariates=series_long) + preds = ensemble_model.predict( + n=5, series=series_long, past_covariates=series_long + ) + self.assertTrue(isinstance(preds, TimeSeries)) + # predict multiple target series + preds = ensemble_model.predict( + n=5, series=[series_long] * 2, past_covariates=[series_long] * 2 + ) + self.assertTrue(isinstance(preds, list) and len(preds) == 2) + # predict single target series in list + preds = ensemble_model.predict( + n=5, series=[series_long], past_covariates=[series_long] + ) + self.assertTrue(isinstance(preds, list) and len(preds) == 1) @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def helper_test_models_accuracy( From d32e5d03035dea9e8748621192a20234457c893a Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 13 Nov 2022 13:24:17 +0100 Subject: [PATCH 3/3] removes some unnecessary test skips for old regression models torch dependency --- .../models/forecasting/test_regression_ensemble_model.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/darts/tests/models/forecasting/test_regression_ensemble_model.py b/darts/tests/models/forecasting/test_regression_ensemble_model.py index cc8fa5fe32..949141287b 100644 --- a/darts/tests/models/forecasting/test_regression_ensemble_model.py +++ b/darts/tests/models/forecasting/test_regression_ensemble_model.py @@ -104,7 +104,6 @@ def get_global_ensembe_model(output_chunk_length=5): regression_train_n_points=10, ) - @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_accepts_different_regression_models(self): regr1 = LinearRegression() regr2 = RandomForestRegressor() @@ -120,7 +119,6 @@ def test_accepts_different_regression_models(self): model.fit(series=self.combined) model.predict(10) - @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_accepts_one_model(self): regr1 = LinearRegression() regr2 = RandomForest(lags_future_covariates=[0]) @@ -134,7 +132,6 @@ def test_accepts_one_model(self): model.fit(series=self.combined) model.predict(10) - @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def test_train_n_points(self): regr = LinearRegressionModel(lags_future_covariates=[0]) @@ -249,7 +246,6 @@ def test_predict_with_target(self): ) self.assertTrue(isinstance(preds, list) and len(preds) == 1) - @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def helper_test_models_accuracy( self, model_instance, n, series, past_covariates, min_rmse ): @@ -268,7 +264,6 @@ def helper_test_models_accuracy( f"Model was not able to denoise data. A rmse score of {current_rmse} was recorded.", ) - @unittest.skipUnless(TORCH_AVAILABLE, "requires torch") def denoising_input(self): np.random.seed(self.RANDOM_SEED)