diff --git a/darts/models/__init__.py b/darts/models/__init__.py index cc982fe94b..60ac60b572 100644 --- a/darts/models/__init__.py +++ b/darts/models/__init__.py @@ -90,12 +90,15 @@ class NotImportedCatBoostModel: try: from darts.models.forecasting.croston import Croston from darts.models.forecasting.sf_auto_arima import StatsForecastAutoARIMA - from darts.models.forecasting.sf_ets import StatsForecastETS + from darts.models.forecasting.sf_auto_ces import StatsForecastAutoCES + from darts.models.forecasting.sf_auto_ets import StatsForecastAutoETS + from darts.models.forecasting.sf_auto_theta import StatsForecastAutoTheta + except ImportError: logger.warning( "The statsforecast module could not be imported. " "To enable support for the StatsForecastAutoARIMA, " - "StatsForecastETS and Croston models, please consider " + "StatsForecastAutoETS and Croston models, please consider " "installing it." ) @@ -104,10 +107,10 @@ class NotImportedStatsForecastAutoARIMA: StatsForecastAutoARIMA = NotImportedStatsForecastAutoARIMA() - class NotImportedStatsForecastETS: + class NotImportedStatsForecastAutoETS: usable = False - StatsForecastETS = NotImportedStatsForecastETS() + StatsForecastAutoETS = NotImportedStatsForecastAutoETS() class NotImportedCroston: usable = False diff --git a/darts/models/components/statsforecast_utils.py b/darts/models/components/statsforecast_utils.py new file mode 100644 index 0000000000..9659ec86a3 --- /dev/null +++ b/darts/models/components/statsforecast_utils.py @@ -0,0 +1,30 @@ +""" +StatsForecast utils +----------- +""" + +import numpy as np + +# In a normal distribution, 68.27 percentage of values lie within one standard deviation of the mean +one_sigma_rule = 68.27 + + +def create_normal_samples( + mu: float, + std: float, + num_samples: int, + n: int, +) -> np.array: + """Generate samples assuming a Normal distribution.""" + samples = np.random.normal(loc=mu, scale=std, size=(num_samples, n)).T + samples = np.expand_dims(samples, axis=1) + return samples + + +def unpack_sf_dict( + forecast_dict: dict, +): + """Unpack the dictionary that is returned by the StatsForecast 'predict()' method.""" + mu = forecast_dict["mean"] + std = forecast_dict[f"hi-{one_sigma_rule}"] - mu + return mu, std diff --git a/darts/models/forecasting/sf_auto_arima.py b/darts/models/forecasting/sf_auto_arima.py index 66e53876a2..a6a048926a 100644 --- a/darts/models/forecasting/sf_auto_arima.py +++ b/darts/models/forecasting/sf_auto_arima.py @@ -5,10 +5,14 @@ from typing import Optional -import numpy as np from statsforecast.models import AutoARIMA as SFAutoARIMA from darts import TimeSeries +from darts.models.components.statsforecast_utils import ( + create_normal_samples, + one_sigma_rule, + unpack_sf_dict, +) from darts.models.forecasting.forecasting_model import ( FutureCovariatesLocalForecastingModel, ) @@ -91,17 +95,15 @@ def _predict( verbose: bool = False, ): super()._predict(n, future_covariates, num_samples) - forecast_df = self.model.predict( + forecast_dict = self.model.predict( h=n, X=future_covariates.values(copy=False) if future_covariates else None, - level=(68.27,), # ask one std for the confidence interval. + level=(one_sigma_rule,), # ask one std for the confidence interval. ) - mu = forecast_df["mean"] + mu, std = unpack_sf_dict(forecast_dict) if num_samples > 1: - std = forecast_df["hi-68.27"] - mu - samples = np.random.normal(loc=mu, scale=std, size=(num_samples, n)).T - samples = np.expand_dims(samples, axis=1) + samples = create_normal_samples(mu, std, num_samples, n) else: samples = mu diff --git a/darts/models/forecasting/sf_auto_ces.py b/darts/models/forecasting/sf_auto_ces.py new file mode 100644 index 0000000000..6d18713595 --- /dev/null +++ b/darts/models/forecasting/sf_auto_ces.py @@ -0,0 +1,80 @@ +""" +StatsForecastAutoCES +----------- +""" + +from statsforecast.models import AutoCES as SFAutoCES + +from darts import TimeSeries +from darts.models.forecasting.forecasting_model import LocalForecastingModel + + +class StatsForecastAutoCES(LocalForecastingModel): + def __init__(self, *autoces_args, **autoces_kwargs): + """Auto-CES based on `Statsforecasts package + `_. + + Automatically selects the best Complex Exponential Smoothing model using an information criterion. + + + We refer to the `statsforecast AutoCES documentation + `_ + for the documentation of the arguments. + + Parameters + ---------- + autoces_args + Positional arguments for ``statsforecasts.models.AutoCES``. + autoces_kwargs + Keyword arguments for ``statsforecasts.models.AutoCES``. + + .. + + Examples + -------- + >>> from darts.models import StatsForecastAutoCES + >>> from darts.datasets import AirPassengersDataset + >>> series = AirPassengersDataset().load() + >>> model = StatsForecastAutoCES(season_length=12) + >>> model.fit(series[:-36]) + >>> pred = model.predict(36, num_samples=100) + """ + super().__init__() + self.model = SFAutoCES(*autoces_args, **autoces_kwargs) + + def __str__(self): + return "Auto-CES-Statsforecasts" + + def fit(self, series: TimeSeries): + super().fit(series) + self._assert_univariate(series) + series = self.training_series + self.model.fit( + series.values(copy=False).flatten(), + ) + return self + + def predict( + self, + n: int, + num_samples: int = 1, + verbose: bool = False, + ): + super().predict(n, num_samples) + forecast_dict = self.model.predict( + h=n, + ) + + mu = forecast_dict["mean"] + + return self._build_forecast_series(mu) + + @property + def min_train_series_length(self) -> int: + return 10 + + def _supports_range_index(self) -> bool: + return True + + def _is_probabilistic(self) -> bool: + return False diff --git a/darts/models/forecasting/sf_ets.py b/darts/models/forecasting/sf_auto_ets.py similarity index 60% rename from darts/models/forecasting/sf_ets.py rename to darts/models/forecasting/sf_auto_ets.py index 2f5f622d90..db12fd320d 100644 --- a/darts/models/forecasting/sf_ets.py +++ b/darts/models/forecasting/sf_auto_ets.py @@ -1,19 +1,25 @@ """ -StatsForecastETS +StatsForecastAutoETS ----------- """ from typing import Optional -from statsforecast.models import ETS +from statsforecast.models import AutoETS as SFAutoETS from darts import TimeSeries +from darts.models import LinearRegressionModel +from darts.models.components.statsforecast_utils import ( + create_normal_samples, + one_sigma_rule, + unpack_sf_dict, +) from darts.models.forecasting.forecasting_model import ( FutureCovariatesLocalForecastingModel, ) -class StatsForecastETS(FutureCovariatesLocalForecastingModel): +class StatsForecastAutoETS(FutureCovariatesLocalForecastingModel): def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs): """ETS based on `Statsforecasts package `_. @@ -25,6 +31,12 @@ def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs) This model accepts the same arguments as the `statsforecast ETS `_. package. + In addition to the StatsForecast implementation, this model can handle future covariates. It does so by first + regressing the series against the future covariates using the :class:'LinearRegressionModel' model and then + running StatsForecast's AutoETS on the in-sample residuals from this original regression. This approach was + inspired by 'this post of Stephan Kolassa< https://stats.stackexchange.com/q/220885>'_. + + Parameters ---------- season_length @@ -64,14 +76,15 @@ def __init__(self, *ets_args, add_encoders: Optional[dict] = None, **ets_kwargs) Examples -------- >>> from darts.datasets import AirPassengersDataset - >>> from darts.models import StatsForecastETS + >>> from darts.models import StatsForecastAutoETS >>> series = AirPassengersDataset().load() - >>> model = StatsForecastETS(season_length=12, model="AZZ") + >>> model = StatsForecastAutoETS(season_length=12, model="AZZ") >>> model.fit(series[:-36]) >>> pred = model.predict(36) """ super().__init__(add_encoders=add_encoders) - self.model = ETS(*ets_args, **ets_kwargs) + self.model = SFAutoETS(*ets_args, **ets_kwargs) + self._linreg = None def __str__(self): return "ETS-Statsforecasts" @@ -80,9 +93,25 @@ def _fit(self, series: TimeSeries, future_covariates: Optional[TimeSeries] = Non super()._fit(series, future_covariates) self._assert_univariate(series) series = self.training_series + + if future_covariates is not None: + # perform OLS and get in-sample residuals + linreg = LinearRegressionModel(lags_future_covariates=[0]) + linreg.fit(series, future_covariates=future_covariates) + fitted_values = linreg.model.predict( + X=future_covariates.slice_intersect(series).values(copy=False) + ) + fitted_values_ts = TimeSeries.from_times_and_values( + times=series.time_index, values=fitted_values + ) + resids = series - fitted_values_ts + self._linreg = linreg + target = resids + else: + target = series + self.model.fit( - series.values(copy=False).flatten(), - X=future_covariates.values(copy=False) if future_covariates else None, + target.values(copy=False).flatten(), ) return self @@ -94,12 +123,27 @@ def _predict( verbose: bool = False, ): super()._predict(n, future_covariates, num_samples) - forecast_df = self.model.predict( + forecast_dict = self.model.predict( h=n, - X=future_covariates.values(copy=False) if future_covariates else None, + level=(one_sigma_rule,), # ask one std for the confidence interval ) - return self._build_forecast_series(forecast_df["mean"]) + mu_ets, std = unpack_sf_dict(forecast_dict) + + if future_covariates is not None: + mu_linreg = self._linreg.predict(n, future_covariates=future_covariates) + mu_linreg_values = mu_linreg.values(copy=False).reshape( + n, + ) + mu = mu_ets + mu_linreg_values + else: + mu = mu_ets + + if num_samples > 1: + samples = create_normal_samples(mu, std, num_samples, n) + else: + samples = mu + return self._build_forecast_series(samples) @property def min_train_series_length(self) -> int: @@ -109,4 +153,4 @@ def _supports_range_index(self) -> bool: return True def _is_probabilistic(self) -> bool: - return False + return True diff --git a/darts/models/forecasting/sf_auto_theta.py b/darts/models/forecasting/sf_auto_theta.py new file mode 100644 index 0000000000..3280a16722 --- /dev/null +++ b/darts/models/forecasting/sf_auto_theta.py @@ -0,0 +1,93 @@ +""" +StatsForecastAutoTheta +----------- +""" + +from statsforecast.models import AutoTheta as SFAutoTheta + +from darts import TimeSeries +from darts.models.components.statsforecast_utils import ( + create_normal_samples, + one_sigma_rule, + unpack_sf_dict, +) +from darts.models.forecasting.forecasting_model import LocalForecastingModel + + +class StatsForecastAutoTheta(LocalForecastingModel): + def __init__(self, *autotheta_args, **autotheta_kwargs): + """Auto-Theta based on `Statsforecasts package + `_. + + Automatically selects the best Theta (Standard Theta Model (‘STM’), Optimized Theta Model (‘OTM’), + Dynamic Standard Theta Model (‘DSTM’), Dynamic Optimized Theta Model (‘DOTM’)) model using mse. + + + It is probabilistic, whereas :class:`FourTheta` is not. + + We refer to the `statsforecast AutoTheta documentation + `_ + for the documentation of the arguments. + + Parameters + ---------- + autotheta_args + Positional arguments for ``statsforecasts.models.AutoTheta``. + autotheta_kwargs + Keyword arguments for ``statsforecasts.models.AutoTheta``. + + .. + + Examples + -------- + >>> from darts.models import StatsForecastAutoTheta + >>> from darts.datasets import AirPassengersDataset + >>> series = AirPassengersDataset().load() + >>> model = StatsForecastAutoTheta(season_length=12) + >>> model.fit(series[:-36]) + >>> pred = model.predict(36, num_samples=100) + """ + super().__init__() + self.model = SFAutoTheta(*autotheta_args, **autotheta_kwargs) + + def __str__(self): + return "Auto-Theta-Statsforecasts" + + def fit(self, series: TimeSeries): + super().fit(series) + self._assert_univariate(series) + series = self.training_series + self.model.fit( + series.values(copy=False).flatten(), + ) + return self + + def predict( + self, + n: int, + num_samples: int = 1, + verbose: bool = False, + ): + super().predict(n, num_samples) + forecast_dict = self.model.predict( + h=n, + level=(one_sigma_rule,), # ask one std for the confidence interval. + ) + + mu, std = unpack_sf_dict(forecast_dict) + if num_samples > 1: + samples = create_normal_samples(mu, std, num_samples, n) + else: + samples = mu + + return self._build_forecast_series(samples) + + @property + def min_train_series_length(self) -> int: + return 10 + + def _supports_range_index(self) -> bool: + return True + + def _is_probabilistic(self) -> bool: + return True diff --git a/darts/tests/models/forecasting/test_local_forecasting_models.py b/darts/tests/models/forecasting/test_local_forecasting_models.py index 63b5c9d82a..04fc2ce771 100644 --- a/darts/tests/models/forecasting/test_local_forecasting_models.py +++ b/darts/tests/models/forecasting/test_local_forecasting_models.py @@ -31,7 +31,9 @@ RandomForest, RegressionModel, StatsForecastAutoARIMA, - StatsForecastETS, + StatsForecastAutoCES, + StatsForecastAutoETS, + StatsForecastAutoTheta, Theta, ) from darts.models.forecasting.forecasting_model import ( @@ -51,7 +53,9 @@ (ARIMA(12, 2, 1), 5.2), (ARIMA(1, 1, 1), 24), (StatsForecastAutoARIMA(season_length=12), 4.6), - (StatsForecastETS(season_length=12, model="AAZ"), 4.1), + (StatsForecastAutoTheta(season_length=12), 5.5), + (StatsForecastAutoCES(season_length=12, model="Z"), 7.3), + (StatsForecastAutoETS(season_length=12, model="AAZ"), 4.1), (Croston(version="classic"), 23), (Croston(version="tsb", alpha_d=0.1, alpha_p=0.1), 23), (Theta(), 11), @@ -85,7 +89,7 @@ dual_models = [ ARIMA(), StatsForecastAutoARIMA(season_length=12), - StatsForecastETS(season_length=12), + StatsForecastAutoETS(season_length=12), Prophet(), AutoARIMA(), ] diff --git a/darts/tests/models/forecasting/test_sf_auto_ets.py b/darts/tests/models/forecasting/test_sf_auto_ets.py new file mode 100644 index 0000000000..00927b9977 --- /dev/null +++ b/darts/tests/models/forecasting/test_sf_auto_ets.py @@ -0,0 +1,63 @@ +import numpy as np +import pandas as pd + +from darts import TimeSeries +from darts.datasets import AirPassengersDataset +from darts.metrics import mae +from darts.models import LinearRegressionModel, StatsForecastAutoETS +from darts.tests.base_test_class import DartsBaseTestClass + + +class StatsForecastAutoETSTestCase(DartsBaseTestClass): + # real timeseries for functionality tests + ts_passengers = AirPassengersDataset().load() + ts_pass_train, ts_pass_val = ts_passengers.split_after(pd.Timestamp("19570101")) + + # as future covariates we want a trend + trend_values = np.arange(start=1, stop=len(ts_passengers) + 1) + trend_times = ts_passengers.time_index + ts_trend = TimeSeries.from_times_and_values( + times=trend_times, values=trend_values, columns=["trend"] + ) + ts_trend_train, ts_trend_val = ts_trend.split_after(pd.Timestamp("19570101")) + + def test_fit_on_residuals(self): + model = StatsForecastAutoETS(season_length=12, model="ZZZ") + + # test if we are indeed fitting the AutoETS on the residuals of the linear regression + model.fit(series=self.ts_pass_train, future_covariates=self.ts_trend_train) + + # create the residuals from the linear regression + fitted_values_linreg = model._linreg.model.predict( + X=self.ts_trend_train.values(copy=False) + ) + fitted_values_linreg_ts = TimeSeries.from_times_and_values( + times=self.ts_pass_train.time_index, values=fitted_values_linreg + ) + resids = self.ts_pass_train - fitted_values_linreg_ts + + # now make in-sample predictions with the AutoETS model + in_sample_preds = model.model.predict_in_sample()["fitted"] + ts_in_sample_preds = TimeSeries.from_times_and_values( + times=self.ts_pass_train.time_index, values=in_sample_preds + ) + + # compare in-sample predictions to the residuals they have supposedly been fitted on + current_mae = mae(resids, ts_in_sample_preds) + + self.assertTrue(current_mae < 9) + + def test_fit_a_linreg(self): + model = StatsForecastAutoETS(season_length=12, model="ZZZ") + model.fit(series=self.ts_pass_train, future_covariates=self.ts_trend_train) + + # check if linear regression was fit + self.assertIsNotNone(model._linreg) + self.assertTrue(model._linreg._fit_called) + + # fit a linear regression + linreg = LinearRegressionModel(lags_future_covariates=[0]) + linreg.fit(series=self.ts_pass_train, future_covariates=self.ts_trend_train) + + # check if the linear regression was fit on the same data by checking if the coefficients are equal + self.assertEqual(model._linreg.model.coef_, linreg.model.coef_) diff --git a/requirements/core.txt b/requirements/core.txt index ccc0ebe5f4..a1b6758911 100644 --- a/requirements/core.txt +++ b/requirements/core.txt @@ -13,7 +13,7 @@ requests>=2.22.0 scikit-learn>=1.0.1 scipy>=1.3.2 shap>=0.40.0 -statsforecast>=1.0.0 +statsforecast>=1.4.0 statsmodels>=0.13.0 tbats>=1.1.0 tqdm>=4.60.0