diff --git a/CHANGELOG.md b/CHANGELOG.md index 1814326b9..00a791d37 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,7 +44,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - - -- +- Teach BATS/TBATS to work with in-sample, out-sample predictions correctly ([#806](https://github.com/tinkoff-ai/etna/pull/806)) - - Github actions cache issue with poetry update ([#778](https://github.com/tinkoff-ai/etna/pull/778)) - diff --git a/etna/models/tbats.py b/etna/models/tbats.py index 9f51f6223..42631245a 100644 --- a/etna/models/tbats.py +++ b/etna/models/tbats.py @@ -11,33 +11,61 @@ from etna.models.base import BaseAdapter from etna.models.base import PerSegmentPredictionIntervalModel +from etna.models.utils import determine_num_steps_to_forecast class _TBATSAdapter(BaseAdapter): def __init__(self, model: Estimator): self.model = model self._fitted_model: Optional[Model] = None + self._last_train_timestamp = None + self._freq = None def fit(self, df: pd.DataFrame, regressors: Iterable[str]): + freq = pd.infer_freq(df["timestamp"], warn=False) + if freq is None: + raise ValueError("Can't determine frequency of a given dataframe") + target = df["target"] self._fitted_model = self.model.fit(target) + self._last_train_timestamp = df["timestamp"].max() + self._freq = freq + return self def predict(self, df: pd.DataFrame, prediction_interval: bool, quantiles: Iterable[float]) -> pd.DataFrame: - if self._fitted_model is None: + if self._fitted_model is None or self._freq is None: raise ValueError("Model is not fitted! Fit the model before calling predict method!") + + if df["timestamp"].min() <= self._last_train_timestamp: + raise NotImplementedError( + "It is not possible to make in-sample predictions with BATS/TBATS model! " + "In-sample predictions aren't supported by current implementation." + ) + + steps_to_forecast = determine_num_steps_to_forecast( + last_train_timestamp=self._last_train_timestamp, last_test_timestamp=df["timestamp"].max(), freq=self._freq + ) + steps_to_skip = steps_to_forecast - df.shape[0] + y_pred = pd.DataFrame() if prediction_interval: for quantile in quantiles: - pred, confidence_intervals = self._fitted_model.forecast(steps=df.shape[0], confidence_level=quantile) + pred, confidence_intervals = self._fitted_model.forecast( + steps=steps_to_forecast, confidence_level=quantile + ) y_pred["target"] = pred if quantile < 1 / 2: y_pred[f"target_{quantile:.4g}"] = confidence_intervals["lower_bound"] else: y_pred[f"target_{quantile:.4g}"] = confidence_intervals["upper_bound"] else: - pred = self._fitted_model.forecast(steps=df.shape[0]) + pred = self._fitted_model.forecast(steps=steps_to_forecast) y_pred["target"] = pred + + # skip non-relevant timestamps + y_pred = y_pred.iloc[steps_to_skip:].reset_index(drop=True) + return y_pred def get_model(self) -> Estimator: diff --git a/etna/models/utils.py b/etna/models/utils.py new file mode 100644 index 000000000..49cb5bb93 --- /dev/null +++ b/etna/models/utils.py @@ -0,0 +1,51 @@ +import pandas as pd + + +def determine_num_steps_to_forecast( + last_train_timestamp: pd.Timestamp, last_test_timestamp: pd.Timestamp, freq: str +) -> int: + """Determine number of steps to make a forecast in future. + + It is useful for out-sample forecast with gap if model predicts only on a certain number of steps + in autoregressive manner. + + Parameters + ---------- + last_train_timestamp: + last timestamp in train data + last_test_timestamp: + last timestamp in test data, should be after ``last_train_timestamp`` + freq: + pandas frequency string: `Offset aliases `_ + + Returns + ------- + : + number of steps + + Raises + ------ + ValueError: + Value of last test timestamp is less or equal than last train timestamp + ValueError: + Last train timestamp isn't correct according to a given frequency + ValueError: + Last test timestamps isn't reachable with a given frequency + """ + if last_test_timestamp <= last_train_timestamp: + raise ValueError("Last train timestamp should be less than last test timestamp!") + + # check if last_train_timestamp is normalized + normalized_last_train_timestamp = pd.date_range(start=last_train_timestamp, periods=1, freq=freq) + if normalized_last_train_timestamp != last_train_timestamp: + raise ValueError(f"Last train timestamp isn't correct according to given frequency: {freq}") + + # make linear probing, because for complex offsets there is a cycle in `pd.date_range` + cur_value = 1 + while True: + timestamps = pd.date_range(start=last_train_timestamp, periods=cur_value + 1, freq=freq) + if timestamps[-1] == last_test_timestamp: + return cur_value + elif timestamps[-1] > last_test_timestamp: + raise ValueError(f"Last test timestamps isn't reachable with freq: {freq}") + cur_value += 1 diff --git a/tests/test_models/test_inference.py b/tests/test_models/test_inference.py index 9d9285ea3..99e162d8b 100644 --- a/tests/test_models/test_inference.py +++ b/tests/test_models/test_inference.py @@ -57,7 +57,7 @@ def _test_forecast_in_sample_suffix(ts, model, transforms): forecast_ts = TSDataset(df, freq="D") forecast_ts.transform(ts.transforms) forecast_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN - forecast_ts.df = forecast_ts.df.iloc[5:] + forecast_ts.df = forecast_ts.df.iloc[6:] model.forecast(forecast_ts) # checking @@ -105,6 +105,41 @@ def _test_forecast_out_sample_suffix(ts, model, transforms): assert_frame_equal(forecast_gap_df, forecast_full_df.iloc[2:]) +def _test_forecast_mixed_in_out_sample(ts, model, transforms): + # fitting + df = ts.to_pandas() + ts.fit_transform(transforms) + model.fit(ts) + + # forecasting mixed in-sample and out-sample + future_ts = ts.make_future(5) + future_df = future_ts.to_pandas().loc[:, pd.IndexSlice[:, "target"]] + df_full = pd.concat((df, future_df)) + forecast_full_ts = TSDataset(df=df_full, freq=future_ts.freq) + forecast_full_ts.transform(ts.transforms) + forecast_full_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN + forecast_full_ts.df = forecast_full_ts.df.iloc[6:] + model.forecast(forecast_full_ts) + + # forecasting only in sample + forecast_in_sample_ts = TSDataset(df, freq="D") + forecast_in_sample_ts.transform(ts.transforms) + forecast_in_sample_ts.df.loc[:, pd.IndexSlice[:, "target"]] = np.NaN + forecast_in_sample_ts.df = forecast_in_sample_ts.df.iloc[6:] + model.forecast(forecast_in_sample_ts) + + # forecasting only out sample + forecast_out_sample_ts = ts.make_future(5) + model.forecast(forecast_out_sample_ts) + + # checking + forecast_full_df = forecast_full_ts.to_pandas() + forecast_in_sample_df = forecast_in_sample_ts.to_pandas() + forecast_out_sample_df = forecast_out_sample_ts.to_pandas() + assert_frame_equal(forecast_in_sample_df, forecast_full_df.iloc[:-5]) + assert_frame_equal(forecast_out_sample_df, forecast_full_df.iloc[-5:]) + + @pytest.mark.parametrize( "model, transforms", [ @@ -118,8 +153,6 @@ def _test_forecast_out_sample_suffix(ts, model, transforms): (MovingAverageModel(window=3), []), (NaiveModel(lag=3), []), (SeasonalMovingAverageModel(), []), - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ], ) def test_forecast_in_sample_full(model, transforms, example_tsds): @@ -167,6 +200,18 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds): _test_forecast_in_sample_full(example_tsds, model, transforms) +@pytest.mark.parametrize( + "model, transforms", + [ + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), + ], +) +def test_forecast_in_sample_full_not_implemented(model, transforms, example_tsds): + with pytest.raises(NotImplementedError, match="It is not possible to make in-sample predictions"): + _test_forecast_in_sample_full(example_tsds, model, transforms) + + @pytest.mark.parametrize( "model, transforms", [ @@ -185,8 +230,6 @@ def test_forecast_in_sample_full_failed(model, transforms, example_tsds): (MovingAverageModel(window=3), []), (NaiveModel(lag=3), []), (SeasonalMovingAverageModel(), []), - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ], ) def test_forecast_in_sample_suffix(model, transforms, example_tsds): @@ -229,6 +272,18 @@ def test_forecast_in_sample_suffix_failed(model, transforms, example_tsds): _test_forecast_in_sample_suffix(example_tsds, model, transforms) +@pytest.mark.parametrize( + "model, transforms", + [ + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), + ], +) +def test_forecast_in_sample_suffix_not_implemented(model, transforms, example_tsds): + with pytest.raises(NotImplementedError, match="It is not possible to make in-sample predictions"): + _test_forecast_in_sample_suffix(example_tsds, model, transforms) + + @pytest.mark.parametrize( "model, transforms", [ @@ -305,6 +360,8 @@ def test_forecast_out_sample_prefix_failed(model, transforms, example_tsds): (HoltModel(), []), (HoltWintersModel(), []), (SimpleExpSmoothingModel(), []), + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), ( TFTModel(max_epochs=1, learning_rate=[0.01]), [ @@ -333,8 +390,6 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds): (MovingAverageModel(window=3), []), (SeasonalMovingAverageModel(), []), (NaiveModel(lag=3), []), - (BATSModel(use_trend=True), []), - (TBATSModel(use_trend=True), []), ( DeepARModel(max_epochs=5, learning_rate=[0.01]), [ @@ -351,3 +406,72 @@ def test_forecast_out_sample_suffix(model, transforms, example_tsds): ) def test_forecast_out_sample_suffix_failed(model, transforms, example_tsds): _test_forecast_out_sample_suffix(example_tsds, model, transforms) + + +@pytest.mark.parametrize( + "model, transforms", + [ + (CatBoostModelPerSegment(), [LagTransform(in_column="target", lags=[5, 6])]), + (CatBoostModelMultiSegment(), [LagTransform(in_column="target", lags=[5, 6])]), + (LinearPerSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), + (LinearMultiSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), + (ElasticPerSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), + (ElasticMultiSegmentModel(), [LagTransform(in_column="target", lags=[5, 6])]), + (ProphetModel(), []), + (HoltModel(), []), + (HoltWintersModel(), []), + (SimpleExpSmoothingModel(), []), + ], +) +def test_forecast_mixed_in_out_sample(model, transforms, example_tsds): + _test_forecast_mixed_in_out_sample(example_tsds, model, transforms) + + +@pytest.mark.xfail(strict=True) +@pytest.mark.parametrize( + "model, transforms", + [ + (SARIMAXModel(), []), + (AutoARIMAModel(), []), + ( + DeepARModel(max_epochs=5, learning_rate=[0.01]), + [ + PytorchForecastingTransform( + max_encoder_length=5, + max_prediction_length=5, + time_varying_known_reals=["time_idx"], + time_varying_unknown_reals=["target"], + target_normalizer=GroupNormalizer(groups=["segment"]), + ) + ], + ), + ( + TFTModel(max_epochs=1, learning_rate=[0.01]), + [ + PytorchForecastingTransform( + max_encoder_length=21, + min_encoder_length=21, + max_prediction_length=5, + time_varying_known_reals=["time_idx"], + time_varying_unknown_reals=["target"], + static_categoricals=["segment"], + target_normalizer=None, + ) + ], + ), + ], +) +def test_forecast_mixed_in_out_sample_failed(model, transforms, example_tsds): + _test_forecast_mixed_in_out_sample(example_tsds, model, transforms) + + +@pytest.mark.parametrize( + "model, transforms", + [ + (BATSModel(use_trend=True), []), + (TBATSModel(use_trend=True), []), + ], +) +def test_forecast_mixed_in_out_sample_not_implemented(model, transforms, example_tsds): + with pytest.raises(NotImplementedError, match="It is not possible to make in-sample predictions"): + _test_forecast_mixed_in_out_sample(example_tsds, model, transforms) diff --git a/tests/test_models/test_tbats.py b/tests/test_models/test_tbats.py index b0316b91c..5ccdb4409 100644 --- a/tests/test_models/test_tbats.py +++ b/tests/test_models/test_tbats.py @@ -110,7 +110,8 @@ def test_dummy(model, sinusoid_ts): @pytest.mark.parametrize("model", [TBATSModel(), BATSModel()]) def test_prediction_interval(model, example_tsds): model.fit(example_tsds) - forecast = model.forecast(example_tsds, prediction_interval=True, quantiles=[0.025, 0.975]) + future_ts = example_tsds.make_future(3) + forecast = model.forecast(future_ts, prediction_interval=True, quantiles=[0.025, 0.975]) for segment in forecast.segments: segment_slice = forecast[:, segment, :][segment] assert {"target_0.025", "target_0.975", "target"}.issubset(segment_slice.columns) diff --git a/tests/test_models/test_utils.py b/tests/test_models/test_utils.py new file mode 100644 index 000000000..d269a5b99 --- /dev/null +++ b/tests/test_models/test_utils.py @@ -0,0 +1,64 @@ +import pandas as pd +import pytest + +from etna.models.utils import determine_num_steps_to_forecast + + +@pytest.mark.parametrize( + "last_train_timestamp, last_test_timestamp, freq, answer", + [ + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02"), "D", 1), + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-11"), "D", 10), + (pd.Timestamp("2020-01-05"), pd.Timestamp("2020-01-19"), "W-SUN", 2), + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-15"), pd.offsets.Week(), 2), + (pd.Timestamp("2020-01-31"), pd.Timestamp("2021-02-28"), "M", 13), + (pd.Timestamp("2020-01-01"), pd.Timestamp("2021-06-01"), "MS", 17), + ], +) +def test_determine_num_steps_to_forecast_ok(last_train_timestamp, last_test_timestamp, freq, answer): + result = determine_num_steps_to_forecast( + last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq + ) + assert result == answer + + +@pytest.mark.parametrize( + "last_train_timestamp, last_test_timestamp, freq", + [ + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-01"), "D"), + (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-01-01"), "D"), + ], +) +def test_determine_num_steps_to_forecast_fail_wrong_order(last_train_timestamp, last_test_timestamp, freq): + with pytest.raises(ValueError, match="Last train timestamp should be less than last test timestamp"): + _ = determine_num_steps_to_forecast( + last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq + ) + + +@pytest.mark.parametrize( + "last_train_timestamp, last_test_timestamp, freq", + [ + (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-06-01"), "M"), + (pd.Timestamp("2020-01-02"), pd.Timestamp("2020-06-01"), "MS"), + ], +) +def test_determine_num_steps_to_forecast_fail_wrong_start(last_train_timestamp, last_test_timestamp, freq): + with pytest.raises(ValueError, match="Last train timestamp isn't correct according to given frequency"): + _ = determine_num_steps_to_forecast( + last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq + ) + + +@pytest.mark.parametrize( + "last_train_timestamp, last_test_timestamp, freq", + [ + (pd.Timestamp("2020-01-31"), pd.Timestamp("2020-06-05"), "M"), + (pd.Timestamp("2020-01-01"), pd.Timestamp("2020-06-05"), "MS"), + ], +) +def test_determine_num_steps_to_forecast_fail_wrong_end(last_train_timestamp, last_test_timestamp, freq): + with pytest.raises(ValueError, match="Last test timestamps isn't reachable with freq"): + _ = determine_num_steps_to_forecast( + last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq + )