Skip to content

Implement forecast decomposition for SMA-based models #1180

Merged
merged 17 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased
### Added
- Forecast decomposition for `SeasonalMovingAverageModel`([#1180](https://github.com/tinkoff-ai/etna/pull/1180))
- Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173))
- Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164))
- Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158))
Expand All @@ -29,7 +30,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add optional parameter `ts` into `forecast` method of pipelines ([#1071](https://github.com/tinkoff-ai/etna/pull/1071))
- Add tests on `transform` method of transforms on subset of segments, on new segments, on future with gap ([#1094](https://github.com/tinkoff-ai/etna/pull/1094))
- Add tests on `inverse_transform` method of transforms on subset of segments, on new segments, on future with gap ([#1127](https://github.com/tinkoff-ai/etna/pull/1127))
-
### Changed
- Add optional `features` parameter in the signature of `TSDataset.to_pandas`, `TSDataset.to_flatten` ([#809](https://github.com/tinkoff-ai/etna/pull/809))
- Signature of the constructor of `TFTModel`, `DeepARModel` ([#1110](https://github.com/tinkoff-ai/etna/pull/1110))
Expand Down
2 changes: 1 addition & 1 deletion etna/datasets/tsdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1166,7 +1166,7 @@ def add_target_components(self, target_components_df: pd.DataFrame):
)

components_sum = target_components_df.sum(axis=1, level="segment")
if not np.array_equal(components_sum.values, self[..., "target"].values):
if not np.allclose(components_sum.values, self[..., "target"].values):
raise ValueError("Components don't sum up to target!")

self._target_components_names = components_names
Expand Down
57 changes: 39 additions & 18 deletions etna/models/seasonal_ma.py
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,27 @@ def _validate_context(self, df: pd.DataFrame, prediction_size: int):
"Given context isn't big enough, try to decrease context_size, prediction_size or increase length of given dataframe!"
)

def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
def _predict_components(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
"""Estimate forecast components."""
self._validate_context(df=df, prediction_size=prediction_size)

all_transformed_features = []
segments = sorted(set(df.columns.get_level_values("segment")))
lags = list(range(self.seasonality, self.context_size + 1, self.seasonality))

target = df.loc[:, pd.IndexSlice[:, "target"]]
for lag in lags:
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
transformed_features = target.shift(lag)
transformed_features.columns = pd.MultiIndex.from_product(
[segments, [f"target_component_lag_{lag}"]], names=("segment", "feature")
)
all_transformed_features.append(transformed_features)

target_components_df = pd.concat(all_transformed_features, axis=1) / self.window
target_components_df = target_components_df.iloc[-prediction_size:]
return target_components_df

def _forecast(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray:
"""Make autoregressive forecasts on a wide dataframe."""
self._validate_context(df=df, prediction_size=prediction_size)

Expand All @@ -96,10 +116,8 @@ def _forecast(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
for i in range(self.context_size, len(res)):
res[i] = res[i - self.context_size : i : self.seasonality].mean(axis=0)

df = df.iloc[-prediction_size:]
y_pred = res[-prediction_size:]
df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
return df
return y_pred

def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
"""Make autoregressive forecasts.
Expand Down Expand Up @@ -128,15 +146,18 @@ def forecast(self, ts: TSDataset, prediction_size: int, return_components: bool
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")

df = ts.to_pandas()
new_df = self._forecast(df=df, prediction_size=prediction_size)
ts.df = new_df
y_pred = self._forecast(df=df, prediction_size=prediction_size)
ts.df = ts.df.iloc[-prediction_size:]
ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred

if return_components:
df.loc[-prediction_size:, pd.IndexSlice[:, "target"]] = y_pred
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still use df.loc[-prediction_size:, pd.IndexSlice[:, "target"]], this probably won't work on all pandas versions (we have error in testing under different pandas versions).

The :prediction_size implies that first index is integer, but it isn't.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, missed this place, you are right. Fixed it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be we should make it more clear why in forecast we add y_pred into df. Because autoregression logic lies within def _forecast and isn't present on level of def forecast.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or we should make it more clear what kind of df we are going to use inside _predict_components. We require that it contains lags that was used to make a prediction (taking into account auto-regression), it isn't clear.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add docstring to _predict_components with description of df

target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
ts.add_target_components(target_components_df=target_components_df)
return ts

def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
def _predict(self, df: pd.DataFrame, prediction_size: int) -> np.ndarray:
"""Make predictions on a wide dataframe using true values as autoregression context."""
self._validate_context(df=df, prediction_size=prediction_size)

Expand All @@ -151,10 +172,8 @@ def _predict(self, df: pd.DataFrame, prediction_size: int) -> pd.DataFrame:
for res_idx, context_idx in enumerate(range(self.context_size, len(context))):
res[res_idx] = context[context_idx - self.context_size : context_idx : self.seasonality].mean(axis=0)

df = df.iloc[-prediction_size:]
y_pred = res[-prediction_size:]
df.loc[:, pd.IndexSlice[:, "target"]] = y_pred
return df
return y_pred

def predict(self, ts: TSDataset, prediction_size: int, return_components: bool = False) -> TSDataset:
"""Make predictions using true values as autoregression context (teacher forcing).
Expand Down Expand Up @@ -183,12 +202,14 @@ def predict(self, ts: TSDataset, prediction_size: int, return_components: bool =
ValueError:
if forecast context contains NaNs
"""
if return_components:
raise NotImplementedError("This mode isn't currently implemented!")

df = ts.to_pandas()
new_df = self._predict(df=df, prediction_size=prediction_size)
ts.df = new_df
y_pred = self._predict(df=df, prediction_size=prediction_size)
ts.df = ts.df.iloc[-prediction_size:]
ts.df.loc[:, pd.IndexSlice[:, "target"]] = y_pred

if return_components:
target_components_df = self._predict_components(df=df, prediction_size=prediction_size)
Mr-Geekman marked this conversation as resolved.
Show resolved Hide resolved
ts.add_target_components(target_components_df=target_components_df)
return ts


Expand Down
2 changes: 1 addition & 1 deletion etna/transforms/math/differencing.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def _fit(self, df: pd.DataFrame) -> "DifferencingTransform":
if NaNs are present inside the segment
"""
# this is made because transforms of high order may need some columns created by transforms of lower order
result_df = df.copy()
result_df = df
for transform in self._differencing_transforms:
result_df = transform._fit_transform(result_df)
self._fit_segments = df.columns.get_level_values("segment").unique().tolist()
Expand Down
2 changes: 1 addition & 1 deletion etna/transforms/math/lags.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
result: pd.Dataframe
transformed dataframe
"""
result = df.copy()
result = df
segments = sorted(set(df.columns.get_level_values("segment")))
all_transformed_features = []
features = df.loc[:, pd.IndexSlice[:, self.in_column]]
Expand Down
8 changes: 4 additions & 4 deletions etna/transforms/math/scalers.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def __init__(
self.with_std = with_std
super().__init__(
in_column=in_column,
transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=True),
transformer=StandardScaler(with_mean=self.with_mean, with_std=self.with_std, copy=False),
out_column=out_column,
inplace=inplace,
mode=mode,
Expand Down Expand Up @@ -140,7 +140,7 @@ def __init__(
with_scaling=self.with_scaling,
quantile_range=self.quantile_range,
unit_variance=self.unit_variance,
copy=True,
copy=False,
),
mode=mode,
)
Expand Down Expand Up @@ -199,7 +199,7 @@ def __init__(
in_column=in_column,
inplace=inplace,
out_column=out_column,
transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=True),
transformer=MinMaxScaler(feature_range=self.feature_range, clip=self.clip, copy=False),
mode=mode,
)

Expand Down Expand Up @@ -248,7 +248,7 @@ def __init__(
in_column=in_column,
inplace=inplace,
out_column=out_column,
transformer=MaxAbsScaler(copy=True),
transformer=MaxAbsScaler(copy=False),
mode=mode,
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def inconsistent_target_components_names_duplication_df(target_components_df):

@pytest.fixture
def inconsistent_target_components_values_df(target_components_df):
target_components_df.loc[10, pd.IndexSlice["1", "target_component_a"]] = 100
target_components_df.loc[target_components_df.index[-1], pd.IndexSlice["1", "target_component_a"]] = 100
return target_components_df


Expand Down
45 changes: 45 additions & 0 deletions tests/test_models/test_simple_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,3 +729,48 @@ def test_deadline_model_forecast_correct_with_big_horizons(two_month_ts):
)
def test_save_load(model, example_tsds):
assert_model_equals_loaded_original(model=model, ts=example_tsds, transforms=[], horizon=3)


@pytest.mark.parametrize("method_name", ("forecast", "predict"))
@pytest.mark.parametrize(
"window, seasonality, expected_components_names",
((1, 7, ["target_component_lag_7"]), (2, 7, ["target_component_lag_7", "target_component_lag_14"])),
)
def test_sma_model_predict_components_correct_names(
example_tsds, method_name, window, seasonality, expected_components_names, horizon=10
):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(example_tsds)
to_call = getattr(model, method_name)
forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)
assert sorted(forecast.target_components_names) == sorted(expected_components_names)


@pytest.mark.parametrize("method_name", ("forecast", "predict"))
@pytest.mark.parametrize("window", (1, 3, 5))
@pytest.mark.parametrize("seasonality", (1, 7, 14))
def test_sma_model_predict_components_sum_up_to_target(example_tsds, method_name, window, seasonality, horizon=10):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(example_tsds)
to_call = getattr(model, method_name)
forecast = to_call(ts=example_tsds, prediction_size=horizon, return_components=True)

target = forecast.to_pandas(features=["target"])
target_components_df = forecast.get_target_components()
np.testing.assert_allclose(target.values, target_components_df.sum(axis=1, level="segment").values)


@pytest.mark.parametrize(
"method_name, expected_values",
(("forecast", [[44, 4], [45, 6], [44, 4]]), ("predict", [[44, 4], [45, 6], [46, 8]])),
)
def test_sma_model_predict_components_correct(
brsnw250 marked this conversation as resolved.
Show resolved Hide resolved
simple_df, method_name, expected_values, window=1, seasonality=2, horizon=3
):
model = SeasonalMovingAverageModel(window=window, seasonality=seasonality)
model.fit(simple_df)
to_call = getattr(model, method_name)
forecast = to_call(ts=simple_df, prediction_size=horizon, return_components=True)

target_components_df = forecast.get_target_components()
np.testing.assert_allclose(target_components_df.values, expected_values)