From 6c27d13fd48dfa95eef0516d0bee9fb5b1583676 Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Tue, 19 Jul 2022 16:43:24 +0300 Subject: [PATCH 1/9] Add basic implementation --- etna/models/tbats.py | 27 +++++++++++++++++++++++++-- tests/test_models/test_inference.py | 16 ++++++++++++++-- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index 9f51f6223..09c3de16f 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -17,27 +17,50 @@ class _TBATSAdapter(BaseAdapter): def __init__(self, model: Estimator): self.model = model self._fitted_model: Optional[Model] = None + self._last_train_timestamp = None + self._freq = None def fit(self, df: pd.DataFrame, regressors: Iterable[str]): target = df["target"] self._fitted_model = self.model.fit(target) + self._last_train_timestamp = df["timestamp"].max() + self._freq = pd.infer_freq(df["timestamp"]) return self + def _determine_num_steps_to_forecast(self, last_test_timestamp: pd.Timestamp) -> int: + diff = last_test_timestamp - self._last_train_timestamp + unit_diff = pd.timedelta_range(start=0, periods=2, freq=self._freq)[1] + num_steps = diff / unit_diff + return int(num_steps) + def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterable[float]) -> pd.DataFrame: if self._fitted_model is None: raise ValueError("Model is not fitted! Fit the model before calling predict method!") + + if df["timestamp"].min() <= self._last_train_timestamp: + raise NotImplementedError("It is not possible to make in-sample predictions with BATS/TBATS model!") + + steps_to_forecast = self._determine_num_steps_to_forecast(df["timestamp"].max()) + steps_to_skip = steps_to_forecast - df.shape[0] + y_pred = pd.DataFrame() if prediction_interval: for quantile in quantiles: - pred, confidence_intervals = self._fitted_model.forecast(steps=df.shape[0], confidence_level=quantile) + pred, confidence_intervals = self._fitted_model.forecast( + steps=steps_to_forecast, confidence_level=quantile + ) y_pred["target"] = pred if quantile < 1 / 2: y_pred[f"target_{quantile:.4g}"] = confidence_intervals["lower_bound"] else: y_pred[f"target_{quantile:.4g}"] = confidence_intervals["upper_bound"] else: - pred = self._fitted_model.forecast(steps=df.shape[0]) + pred = self._fitted_model.forecast(steps=steps_to_forecast) y_pred["target"] = pred + + # skip non-relevant timestamps + y_pred = y_pred.iloc[steps_to_skip:].reset_index(drop=True) + return y_pred def get_model(self) -> Estimator: diff --git a/tests/test_models/test_inference.py b/tests/test_models/test_inference.py index 9d9285ea3..7baf2aca5 100644 --- a/tests/test_models/test_inference.py +++ b/tests/test_models/test_inference.py @@ -167,6 +167,18 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds): _test_forecast_in_sample_full(example_tsds, model, transforms) +@pytest.mark.parametrize( + "model, transforms", + [ + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), + ], +) +def test_forecast_in_sample_not_implemented(model, transforms, example_tsds): + with pytest.raises(NotImplementedError, match="It is not possible to make in-sample predictions"): + _test_forecast_in_sample_full(example_tsds, model, transforms) + + @pytest.mark.parametrize( "model, transforms", [ @@ -305,6 +317,8 @@ def test_forecast_out_sample_prefix_failed(model, transforms, example_tsds): (HoltModel(), []), (HoltWintersModel(), []), (SimpleExpSmoothingModel(), []), + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), ( TFTModel(max_epochs=1, learning_rate=[0.01]), [ @@ -333,8 +347,6 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds): (MovingAverageModel(window=3), []), (SeasonalMovingAverageModel(), []), (NaiveModel(lag=3), []), - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ( DeepARModel(max_epochs=5, learning_rate=[0.01]), [ From 1511b849a161260f0cfb33abe2c87d6c18c7c843 Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Tue, 19 Jul 2022 16:46:37 +0300 Subject: [PATCH 2/9] Remove warning about frequency --- etna/models/tbats.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index 09c3de16f..290f204a5 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -24,7 +24,7 @@ def fit(self, df: pd.DataFrame, regressors: Iterable[str]): target = df["target"] self._fitted_model = self.model.fit(target) self._last_train_timestamp = df["timestamp"].max() - self._freq = pd.infer_freq(df["timestamp"]) + self._freq = pd.infer_freq(df["timestamp"], warn=False) return self def _determine_num_steps_to_forecast(self, last_test_timestamp: pd.Timestamp) -> int: From 9ff2d78ca7688937cc8e253ceb64d92bc957ae43 Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Tue, 19 Jul 2022 16:57:35 +0300 Subject: [PATCH 3/9] Fix positions of some cases --- tests/test_models/test_inference.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tests/test_models/test_inference.py b/tests/test_models/test_inference.py index 7baf2aca5..ddd136b11 100644 --- a/tests/test_models/test_inference.py +++ b/tests/test_models/test_inference.py @@ -118,8 +118,6 @@ def _test_forecast_out_sample_suffix(ts, model, transforms): (MovingAverageModel(window=3), []), (NaiveModel(lag=3), []), (SeasonalMovingAverageModel(), []), - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ], ) def test_forecast_in_sample_full(model, transforms, example_tsds): @@ -174,7 +172,7 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds): (TBATSModel(use_trend=True), []), ], ) -def test_forecast_in_sample_not_implemented(model, transforms, example_tsds): +def test_forecast_in_sample_full_not_implemented(model, transforms, example_tsds): with pytest.raises(NotImplementedError, match="It is not possible to make in-sample predictions"): _test_forecast_in_sample_full(example_tsds, model, transforms) @@ -197,8 +195,6 @@ def test_forecast_in_sample_not_implemented(model, transforms, example_tsds): (MovingAverageModel(window=3), []), (NaiveModel(lag=3), []), (SeasonalMovingAverageModel(), []), - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ], ) def test_forecast_in_sample_suffix(model, transforms, example_tsds): @@ -241,6 +237,18 @@ def test_forecast_in_sample_suffix_failed(model, transforms, example_tsds): _test_forecast_in_sample_suffix(example_tsds, model, transforms) +@pytest.mark.parametrize( + "model, transforms", + [ + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), + ], +) +def test_forecast_in_sample_suffix_not_implemented(model, transforms, example_tsds): + with pytest.raises(NotImplementedError, match="It is not possible to make in-sample predictions"): + _test_forecast_in_sample_suffix(example_tsds, model, transforms) + + @pytest.mark.parametrize( "model, transforms", [ From bde7ca68393a0a3c4ad35a93f8df08f583f6a7d3 Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Tue, 19 Jul 2022 17:01:43 +0300 Subject: [PATCH 4/9] Update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d2afebf5..46763bd81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,7 +44,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - - -- +- Teach BATS/TBATS to work with in-sample, out-sample predictions correctly ([#806](https://github.com/tinkoff-ai/etna/pull/806)) - - Github actions cache issue with poetry update ([#778](https://github.com/tinkoff-ai/etna/pull/778)) - From d0e88076c2f268d5826ec03cd6f97625d3d55b3a Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Tue, 19 Jul 2022 17:30:38 +0300 Subject: [PATCH 5/9] Fix test_prediction_interval --- tests/test_models/test_tbats.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index b0316b91c..5ccdb4409 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -110,7 +110,8 @@ def test_dummy(model, sinusoid_ts): @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) def test_prediction_interval(model, example_tsds): model.fit(example_tsds) - forecast = model.forecast(example_tsds, prediction_interval=True, quantiles=[0.025, 0.975]) + future_ts = example_tsds.make_future(3) + forecast = model.forecast(future_ts, prediction_interval=True, quantiles=[0.025, 0.975]) for segment in forecast.segments: segment_slice = forecast[:, segment, :][segment] assert {"target_0.025", "target_0.975", "target"}.issubset(segment_slice.columns) From f8582b8a6cf8a43b78832352b1b3f44e0e2d38fd Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Thu, 21 Jul 2022 11:24:44 +0300 Subject: [PATCH 6/9] Update error message --- etna/models/tbats.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/etna/models/tbats.py b/etna/models/tbats.py index 290f204a5..0b51f561c 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -38,7 +38,10 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterab raise ValueError("Model is not fitted! Fit the model before calling predict method!") if df["timestamp"].min() <= self._last_train_timestamp: - raise NotImplementedError("It is not possible to make in-sample predictions with BATS/TBATS model!") + raise NotImplementedError( + "It is not possible to make in-sample predictions with BATS/TBATS model! " + "In-sample predictions aren't supported by current implementation." + ) steps_to_forecast = self._determine_num_steps_to_forecast(df["timestamp"].max()) steps_to_skip = steps_to_forecast - df.shape[0] From 7bbe55416ed76cc2a0ee850021540c716c18b595 Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Thu, 21 Jul 2022 12:10:11 +0300 Subject: [PATCH 7/9] Add test in mised in-sample out-sample prediction --- etna/models/sarimax.py | 2 +- tests/test_models/test_inference.py | 106 +++++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/etna/models/sarimax.py b/etna/models/sarimax.py index a8c3e6324..a26263317 100644 --- a/etna/models/sarimax.py +++ b/etna/models/sarimax.py @@ -274,7 +274,7 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen y_pred[f"mean_{quantile:.4g}"] = series else: forecast = self._result.get_prediction( - start=df["timestamp"].min(), end=df["timestamp"].max(), dynamic=True, exog=exog_future + start=df["timestamp"].min(), end=df["timestamp"].max(), dynamic=False, exog=exog_future ) y_pred = forecast.predicted_mean y_pred.name = "mean" diff --git a/tests/test_models/test_inference.py b/tests/test_models/test_inference.py index ddd136b11..0aa9e89ab 100644 --- a/tests/test_models/test_inference.py +++ b/tests/test_models/test_inference.py @@ -57,7 +57,7 @@ def _test_forecast_in_sample_suffix(ts, model, transforms): forecast_ts = TSDataset(df, freq="D") forecast_ts.transform(ts.transforms) forecast_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN - forecast_ts.df = forecast_ts.df.iloc[5:] + forecast_ts.df = forecast_ts.df.iloc[6:] model.forecast(forecast_ts) # checking @@ -105,6 +105,41 @@ def _test_forecast_out_sample_suffix(ts, model, transforms): assert_frame_equal(forecast_gap_df, forecast_full_df.iloc[2:]) +def _test_forecast_mixed_in_out_sample(ts, model, transforms): + # fitting + df = ts.to_pandas() + ts.fit_transform(transforms) + model.fit(ts) + + # forecasting mixed in-sample and out-sample + future_ts = ts.make_future(5) + future_df = future_ts.to_pandas().loc[:, pd.IndexSlice[:, "target"]] + df_full = pd.concat((df, future_df)) + forecast_full_ts = TSDataset(df=df_full, freq=future_ts.freq) + forecast_full_ts.transform(ts.transforms) + forecast_full_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN + forecast_full_ts.df = forecast_full_ts.df.iloc[6:] + model.forecast(forecast_full_ts) + + # forecasting only in sample + forecast_in_sample_ts = TSDataset(df, freq="D") + forecast_in_sample_ts.transform(ts.transforms) + forecast_in_sample_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN + forecast_in_sample_ts.df = forecast_in_sample_ts.df.iloc[6:] + model.forecast(forecast_in_sample_ts) + + # forecasting only out sample + forecast_out_sample_ts = ts.make_future(5) + model.forecast(forecast_out_sample_ts) + + # checking + forecast_full_df = forecast_full_ts.to_pandas() + forecast_in_sample_df = forecast_in_sample_ts.to_pandas() + forecast_out_sample_df = forecast_out_sample_ts.to_pandas() + assert_frame_equal(forecast_in_sample_df, forecast_full_df.iloc[:-5]) + assert_frame_equal(forecast_out_sample_df, forecast_full_df.iloc[-5:]) + + @pytest.mark.parametrize( "model, transforms", [ @@ -371,3 +406,72 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds): ) def test_forecast_out_sample_suffix_failed(model, transforms, example_tsds): _test_forecast_out_sample_suffix(example_tsds, model, transforms) + + +@pytest.mark.parametrize( + "model, transforms", + [ + (CatBoostModelPerSegment(), [LagTransform(in_column="target", lags=[5, 6])]), + (CatBoostModelMultiSegment(), [LagTransform(in_column="target", lags=[5, 6])]), + (LinearPerSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), + (LinearMultiSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), + (ElasticPerSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), + (ElasticMultiSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), + (ProphetModel(), []), + (SARIMAXModel(), []), + (HoltModel(), []), + (HoltWintersModel(), []), + (SimpleExpSmoothingModel(), []), + ], +) +def test_forecast_mixed_in_out_sample(model, transforms, example_tsds): + _test_forecast_mixed_in_out_sample(example_tsds, model, transforms) + + +@pytest.mark.xfail(strict=True) +@pytest.mark.parametrize( + "model, transforms", + [ + (AutoARIMAModel(), []), + ( + DeepARModel(max_epochs=5, learning_rate=[0.01]), + [ + PytorchForecastingTransform( + max_encoder_length=5, + max_prediction_length=5, + time_varying_known_reals=["time_idx"], + time_varying_unknown_reals=["target"], + target_normalizer=GroupNormalizer(groups=["segment"]), + ) + ], + ), + ( + TFTModel(max_epochs=1, learning_rate=[0.01]), + [ + PytorchForecastingTransform( + max_encoder_length=21, + min_encoder_length=21, + max_prediction_length=5, + time_varying_known_reals=["time_idx"], + time_varying_unknown_reals=["target"], + static_categoricals=["segment"], + target_normalizer=None, + ) + ], + ), + ], +) +def test_forecast_mixed_in_out_sample_failed(model, transforms, example_tsds): + _test_forecast_mixed_in_out_sample(example_tsds, model, transforms) + + +@pytest.mark.parametrize( + "model, transforms", + [ + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), + ], +) +def test_forecast_mixed_in_out_sample_not_implemented(model, transforms, example_tsds): + with pytest.raises(NotImplementedError, match="It is not possible to make in-sample predictions"): + _test_forecast_mixed_in_out_sample(example_tsds, model, transforms) From 7e31a996c7dc77fb098ab8f846c15dd4ddc2f6fe Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Thu, 21 Jul 2022 14:41:04 +0300 Subject: [PATCH 8/9] Add separate implementation of determine_num_steps_to_forecast and tests for it --- etna/models/tbats.py | 20 ++++++----- etna/models/utils.py | 51 ++++++++++++++++++++++++++ tests/test_models/test_utils.py | 64 +++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 9 deletions(-) create mode 100644 etna/models/utils.py create mode 100644 tests/test_models/test_utils.py diff --git a/etna/models/tbats.py b/etna/models/tbats.py index 0b51f561c..42631245a 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -11,6 +11,7 @@ from etna.models.base import BaseAdapter from etna.models.base import PerSegmentPredictionIntervalModel +from etna.models.utils import determine_num_steps_to_forecast class _TBATSAdapter(BaseAdapter): @@ -21,20 +22,19 @@ def __init__(self, model: Estimator): self._freq = None def fit(self, df: pd.DataFrame, regressors: Iterable[str]): + freq = pd.infer_freq(df["timestamp"], warn=False) + if freq is None: + raise ValueError("Can't determine frequency of a given dataframe") + target = df["target"] self._fitted_model = self.model.fit(target) self._last_train_timestamp = df["timestamp"].max() - self._freq = pd.infer_freq(df["timestamp"], warn=False) - return self + self._freq = freq - def _determine_num_steps_to_forecast(self, last_test_timestamp: pd.Timestamp) -> int: - diff = last_test_timestamp - self._last_train_timestamp - unit_diff = pd.timedelta_range(start=0, periods=2, freq=self._freq)[1] - num_steps = diff / unit_diff - return int(num_steps) + return self def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterable[float]) -> pd.DataFrame: - if self._fitted_model is None: + if self._fitted_model is None or self._freq is None: raise ValueError("Model is not fitted! Fit the model before calling predict method!") if df["timestamp"].min() <= self._last_train_timestamp: @@ -43,7 +43,9 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterab "In-sample predictions aren't supported by current implementation." ) - steps_to_forecast = self._determine_num_steps_to_forecast(df["timestamp"].max()) + steps_to_forecast = determine_num_steps_to_forecast( + last_train_timestamp=self._last_train_timestamp, last_test_timestamp=df["timestamp"].max(), freq=self._freq + ) steps_to_skip = steps_to_forecast - df.shape[0] y_pred = pd.DataFrame() diff --git a/etna/models/utils.py b/etna/models/utils.py new file mode 100644 index 000000000..49cb5bb93 --- /dev/null +++ b/etna/models/utils.py @@ -0,0 +1,51 @@ +import pandas as pd + + +def determine_num_steps_to_forecast( + last_train_timestamp: pd.Timestamp, last_test_timestamp: pd.Timestamp, freq: str +) -> int: + """Determine number of steps to make a forecast in future. + + It is useful for out-sample forecast with gap if model predicts only on a certain number of steps + in autoregressive manner. + + Parameters + ---------- + last_train_timestamp: + last timestamp in train data + last_test_timestamp: + last timestamp in test data, should be after ``last_train_timestamp`` + freq: + pandas frequency string: `Offset aliases `_ + + Returns + ------- + : + number of steps + + Raises + ------ + ValueError: + Value of last test timestamp is less or equal than last train timestamp + ValueError: + Last train timestamp isn't correct according to a given frequency + ValueError: + Last test timestamps isn't reachable with a given frequency + """ + if last_test_timestamp <= last_train_timestamp: + raise ValueError("Last train timestamp should be less than last test timestamp!") + + # check if last_train_timestamp is normalized + normalized_last_train_timestamp = pd.date_range(start=last_train_timestamp, periods=1, freq=freq) + if normalized_last_train_timestamp != last_train_timestamp: + raise ValueError(f"Last train timestamp isn't correct according to given frequency: {freq}") + + # make linear probing, because for complex offsets there is a cycle in `pd.date_range` + cur_value = 1 + while True: + timestamps = pd.date_range(start=last_train_timestamp, periods=cur_value + 1, freq=freq) + if timestamps[-1] == last_test_timestamp: + return cur_value + elif timestamps[-1] > last_test_timestamp: + raise ValueError(f"Last test timestamps isn't reachable with freq: {freq}") + cur_value += 1 diff --git a/tests/test_models/test_utils.py b/tests/test_models/test_utils.py new file mode 100644 index 000000000..d269a5b99 --- /dev/null +++ b/tests/test_models/test_utils.py @@ -0,0 +1,64 @@ +import pandas as pd +import pytest + +from etna.models.utils import determine_num_steps_to_forecast + + +@pytest.mark.parametrize( + "last_train_timestamp, last_test_timestamp, freq, answer", + [ + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02"), "D", 1), + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-11"), "D", 10), + (pd.Timestamp("2020-01-05"), pd.Timestamp("2020-01-19"), "W-SUN", 2), + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-15"), pd.offsets.Week(), 2), + (pd.Timestamp("2020-01-31"), pd.Timestamp("2021-02-28"), "M", 13), + (pd.Timestamp("2020-01-01"), pd.Timestamp("2021-06-01"), "MS", 17), + ], +) +def test_determine_num_steps_to_forecast_ok(last_train_timestamp, last_test_timestamp, freq, answer): + result = determine_num_steps_to_forecast( + last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq + ) + assert result == answer + + +@pytest.mark.parametrize( + "last_train_timestamp, last_test_timestamp, freq", + [ + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-01"), "D"), + (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-01-01"), "D"), + ], +) +def test_determine_num_steps_to_forecast_fail_wrong_order(last_train_timestamp, last_test_timestamp, freq): + with pytest.raises(ValueError, match="Last train timestamp should be less than last test timestamp"): + _ = determine_num_steps_to_forecast( + last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq + ) + + +@pytest.mark.parametrize( + "last_train_timestamp, last_test_timestamp, freq", + [ + (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-06-01"), "M"), + (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-06-01"), "MS"), + ], +) +def test_determine_num_steps_to_forecast_fail_wrong_start(last_train_timestamp, last_test_timestamp, freq): + with pytest.raises(ValueError, match="Last train timestamp isn't correct according to given frequency"): + _ = determine_num_steps_to_forecast( + last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq + ) + + +@pytest.mark.parametrize( + "last_train_timestamp, last_test_timestamp, freq", + [ + (pd.Timestamp("2020-01-31"), pd.Timestamp("2020-06-05"), "M"), + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-06-05"), "MS"), + ], +) +def test_determine_num_steps_to_forecast_fail_wrong_end(last_train_timestamp, last_test_timestamp, freq): + with pytest.raises(ValueError, match="Last test timestamps isn't reachable with freq"): + _ = determine_num_steps_to_forecast( + last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq + ) From fa90f48b661be85f5f0ec4c002855cf736fafde7 Mon Sep 17 00:00:00 2001 From: "d.a.bunin" Date: Fri, 22 Jul 2022 10:48:51 +0300 Subject: [PATCH 9/9] Remove changing SARIMAX --- etna/models/sarimax.py | 2 +- tests/test_models/test_inference.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/etna/models/sarimax.py b/etna/models/sarimax.py index a26263317..a8c3e6324 100644 --- a/etna/models/sarimax.py +++ b/etna/models/sarimax.py @@ -274,7 +274,7 @@ def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Sequen y_pred[f"mean_{quantile:.4g}"] = series else: forecast = self._result.get_prediction( - start=df["timestamp"].min(), end=df["timestamp"].max(), dynamic=False, exog=exog_future + start=df["timestamp"].min(), end=df["timestamp"].max(), dynamic=True, exog=exog_future ) y_pred = forecast.predicted_mean y_pred.name = "mean" diff --git a/tests/test_models/test_inference.py b/tests/test_models/test_inference.py index 0aa9e89ab..99e162d8b 100644 --- a/tests/test_models/test_inference.py +++ b/tests/test_models/test_inference.py @@ -418,7 +418,6 @@ def test_forecast_out_sample_suffix_failed(model, transforms, example_tsds): (ElasticPerSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), (ElasticMultiSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), (ProphetModel(), []), - (SARIMAXModel(), []), (HoltModel(), []), (HoltWintersModel(), []), (SimpleExpSmoothingModel(), []), @@ -432,6 +431,7 @@ def test_forecast_mixed_in_out_sample(model, transforms, example_tsds): @pytest.mark.parametrize( "model, transforms", [ + (SARIMAXModel(), []), (AutoARIMAModel(), []), ( DeepARModel(max_epochs=5, learning_rate=[0.01]),