Skip to content

Implement forecast decomposition for SMA-based models #1180

Merged
merged 17 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased
### Added
- Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180))
- Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173))
- Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164))
- Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158))
Expand All @@ -29,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add optional parameter `ts` into `forecast` method of pipelines ([#1071](https://github.com/tinkoff-ai/etna/pull/1071))
- Add tests on `transform` method of transforms on subset of segments, on new segments, on future with gap ([#1094](https://github.com/tinkoff-ai/etna/pull/1094))
- Add tests on `inverse_transform` method of transforms on subset of segments, on new segments, on future with gap ([#1127](https://github.com/tinkoff-ai/etna/pull/1127))
-
### Changed
- Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809))
- Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
Expand Down
2 changes: 1 addition & 1 deletion etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ def add_target_components(self, target_components_df: pd.DataFrame):
)

components_sum = target_components_df.sum(axis=1, level="segment")
if not np.array_equal(components_sum.values, self[..., "target"].values):
if not np.allclose(components_sum.values, self[..., "target"].values):
raise ValueError("Components don't sum up to target!")

self._target_components_names = components_names
Expand Down
5 changes: 5 additions & 0 deletions etna/models/moving_average.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ class MovingAverageModel(SeasonalMovingAverageModel):
y_{t} = \\frac{\\sum_{i=1}^{n} y_{t-i} }{n},

where :math:`n` is window size.

Notes
-----
This model supports in-sample and out-of-sample prediction decomposition.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This model supports in-sample and out-of-sample prediction decomposition.

I'm not sure that it is an implementation detail and should be in Notes.

Prediction components are corresponding target lags with weights of 1/window

May it could be better to write :math:1 / window

Prediction components are corresponding target lags with weights of 1/window.
"""

def __init__(self, window: int = 5):
Expand Down
5 changes: 5 additions & 0 deletions etna/models/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ class NaiveModel(SeasonalMovingAverageModel):
y_{t} = y_{t-s},

where :math:`s` is lag.

Notes
-----
This model supports in-sample and out-of-sample prediction decomposition.
Prediction component here is the corresponding target lag.
"""

def __init__(self, lag: int = 1):
Expand Down
78 changes: 60 additions & 18 deletions etna/models/seasonal_ma.py
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ class SeasonalMovingAverageModel(
y_{t} = \\frac{\\sum_{i=1}^{n} y_{t-is} }{n},

where :math:`s` is seasonality, :math:`n` is window size (how many history values are taken for forecast).

Notes
-----
This model supports in-sample and out-of-sample prediction decomposition.
Prediction components are corresponding target lags with weights of 1/window.
"""

def __init__(self, window: int = 5, seasonality: int = 7):
Expand Down Expand Up @@ -81,7 +86,41 @@ def _validate_context(self, df: pd.DataFrame, prediction_size: int):
"Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!"
)

def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
def _predict_components(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
"""Estimate forecast components.

Parameters
----------
df:
DatаFrame with target, containing lags that was used to make a prediction
prediction_size:
Number of last timestamps to leave after making prediction.
Previous timestamps will be used as a context.

Returns
-------
:
DataFrame with target components
"""
self._validate_context(df=df, prediction_size=prediction_size)

all_transformed_features = []
segments = sorted(set(df.columns.get_level_values("segment")))
lags = list(range(self.seasonality, self.context_size + 1, self.seasonality))

target = df.loc[:, pd.IndexSlice[:, "target"]]
for lag in lags:
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
transformed_features = target.shift(lag)
transformed_features.columns = pd.MultiIndex.from_product(
[segments, [f"target_component_lag_{lag}"]], names=("segment", "feature")
)
all_transformed_features.append(transformed_features)

target_components_df = pd.concat(all_transformed_features, axis=1) / self.window
target_components_df = target_components_df.iloc[-prediction_size:]
return target_components_df

def _forecast(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray:
"""Make autoregressive forecasts on a wide dataframe."""
self._validate_context(df=df, prediction_size=prediction_size)

Expand All @@ -96,10 +135,8 @@ def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
for i in range(self.context_size, len(res)):
res[i] = res[i - self.context_size : i : self.seasonality].mean(axis=0)

df = df.iloc[-prediction_size:]
y_pred = res[-prediction_size:]
df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
return df
return y_pred

def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
"""Make autoregressive forecasts.
Expand Down Expand Up @@ -128,15 +165,19 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")

df = ts.to_pandas()
new_df = self._forecast(df=df, prediction_size=prediction_size)
ts.df = new_df
y_pred = self._forecast(df=df, prediction_size=prediction_size)
ts.df = ts.df.iloc[-prediction_size:]
ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred

if return_components:
# We use predicted targets as lags in autoregressive style
df.loc[df.index[-prediction_size:], pd.IndexSlice[:, "target"]] = y_pred
target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
ts.add_target_components(target_components_df=target_components_df)
return ts

def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
def _predict(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray:
"""Make predictions on a wide dataframe using true values as autoregression context."""
self._validate_context(df=df, prediction_size=prediction_size)

Expand All @@ -151,10 +192,8 @@ def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
for res_idx, context_idx in enumerate(range(self.context_size, len(context))):
res[res_idx] = context[context_idx - self.context_size : context_idx : self.seasonality].mean(axis=0)

df = df.iloc[-prediction_size:]
y_pred = res[-prediction_size:]
df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
return df
return y_pred

def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
"""Make predictions using true values as autoregression context (teacher forcing).
Expand Down Expand Up @@ -183,12 +222,15 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool =
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")

df = ts.to_pandas()
new_df = self._predict(df=df, prediction_size=prediction_size)
ts.df = new_df
y_pred = self._predict(df=df, prediction_size=prediction_size)
ts.df = ts.df.iloc[-prediction_size:]
ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred

if return_components:
# We use true targets as lags
target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
Mr-Geekman marked this conversation as resolved.
Show resolved Hide resolved
ts.add_target_components(target_components_df=target_components_df)
return ts


Expand Down
2 changes: 1 addition & 1 deletion etna/transforms/math/differencing.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def _fit(self, df: pd.DataFrame) -> "DifferencingTransform":
if NaNs are present inside the segment
"""
# this is made because transforms of high order may need some columns created by transforms of lower order
result_df = df.copy()
result_df = df
for transform in self._differencing_transforms:
result_df = transform._fit_transform(result_df)
self._fit_segments = df.columns.get_level_values("segment").unique().tolist()
Expand Down
2 changes: 1 addition & 1 deletion etna/transforms/math/lags.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
result: pd.Dataframe
transformed dataframe
"""
result = df.copy()
result = df
segments = sorted(set(df.columns.get_level_values("segment")))
all_transformed_features = []
features = df.loc[:, pd.IndexSlice[:, self.in_column]]
Expand Down
8 changes: 4 additions & 4 deletions etna/transforms/math/scalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(
self.with_std = with_std
super().__init__(
in_column=in_column,
transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=True),
transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=False),
out_column=out_column,
inplace=inplace,
mode=mode,
Expand Down Expand Up @@ -140,7 +140,7 @@ def __init__(
with_scaling=self.with_scaling,
quantile_range=self.quantile_range,
unit_variance=self.unit_variance,
copy=True,
copy=False,
),
mode=mode,
)
Expand Down Expand Up @@ -199,7 +199,7 @@ def __init__(
in_column=in_column,
inplace=inplace,
out_column=out_column,
transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=True),
transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=False),
mode=mode,
)

Expand Down Expand Up @@ -248,7 +248,7 @@ def __init__(
in_column=in_column,
inplace=inplace,
out_column=out_column,
transformer=MaxAbsScaler(copy=True),
transformer=MaxAbsScaler(copy=False),
mode=mode,
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def inconsistent_target_components_names_duplication_df(target_components_df):

@pytest.fixture
def inconsistent_target_components_values_df(target_components_df):
target_components_df.loc[10, pd.IndexSlice["1", "target_component_a"]] = 100
target_components_df.loc[target_components_df.index[-1], pd.IndexSlice["1", "target_component_a"]] = 100
return target_components_df


Expand Down
45 changes: 45 additions & 0 deletions tests/test_models/test_simple_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,3 +729,48 @@ def test_deadline_model_forecast_correct_with_big_horizons(two_month_ts):
)
def test_save_load(model, example_tsds):
assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3)


@pytest.mark.parametrize("method_name", ("forecast", "predict"))
@pytest.mark.parametrize(
"window, seasonality, expected_components_names",
((1, 7, ["target_component_lag_7"]), (2, 7, ["target_component_lag_7", "target_component_lag_14"])),
)
def test_sma_model_predict_components_correct_names(
example_tsds, method_name, window, seasonality, expected_components_names, horizon=10
):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(example_tsds)
to_call = getattr(model, method_name)
forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)
assert sorted(forecast.target_components_names) == sorted(expected_components_names)


@pytest.mark.parametrize("method_name", ("forecast", "predict"))
@pytest.mark.parametrize("window", (1, 3, 5))
@pytest.mark.parametrize("seasonality", (1, 7, 14))
def test_sma_model_predict_components_sum_up_to_target(example_tsds, method_name, window, seasonality, horizon=10):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(example_tsds)
to_call = getattr(model, method_name)
forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)

target = forecast.to_pandas(features=["target"])
target_components_df = forecast.get_target_components()
np.testing.assert_allclose(target.values, target_components_df.sum(axis=1, level="segment").values)


@pytest.mark.parametrize(
"method_name, expected_values",
(("forecast", [[44, 4], [45, 6], [44, 4]]), ("predict", [[44, 4], [45, 6], [46, 8]])),
)
def test_sma_model_predict_components_correct(
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
simple_df, method_name, expected_values, window=1, seasonality=2, horizon=3
):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(simple_df)
to_call = getattr(model, method_name)
forecast = to_call(ts=simple_df, prediction_size=horizon, return_components=True)

target_components_df = forecast.get_target_components()
np.testing.assert_allclose(target_components_df.values, expected_values)