Skip to content

Add target components logic to pipelines #1173

Merged
merged 16 commits into from
Mar 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/docs-on-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/docs-unstable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true
- name: Load cached venv
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/notebooks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true
- name: Install dependencies
Expand Down
9 changes: 6 additions & 3 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:

- name: Install Dependencies
run: |
pip install poetry==1.4.0
pip install poetry==1.4.0 # TODO: remove after poetry fix
poetry --version
poetry config virtualenvs.in-project true
poetry install -E style --no-root
Expand All @@ -48,6 +48,7 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down Expand Up @@ -86,6 +87,7 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down Expand Up @@ -123,6 +125,7 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down Expand Up @@ -160,7 +163,7 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down Expand Up @@ -200,7 +203,7 @@ jobs:
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0
version: 1.4.0 # TODO: remove after poetry fix
virtualenvs-create: true
virtualenvs-in-project: true

Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased
### Added
- Target components logic into base classes of pipelines ([#1173](https://github.com/tinkoff-ai/etna/pull/1173))
- Method `predict_components` for forecast decomposition in `_SklearnAdapter` and `_LinearAdapter` for linear models ([#1164](https://github.com/tinkoff-ai/etna/pull/1164))
- Target components logic into base classes of models ([#1158](https://github.com/tinkoff-ai/etna/pull/1158))
- Target components logic to TSDataset ([#1153](https://github.com/tinkoff-ai/etna/pull/1153))
Expand Down
7 changes: 6 additions & 1 deletion etna/ensembles/direct_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,16 @@ def _merge(self, forecasts: List[TSDataset]) -> TSDataset:
forecast_dataset = TSDataset(df=forecast_df, freq=forecasts[0].freq)
return forecast_dataset

def _forecast(self) -> TSDataset:
def _forecast(self, return_components: bool) -> TSDataset:
"""Make predictions.

In each point in the future, forecast of the ensemble is forecast of base pipeline with the shortest horizon,
which covers this point.
"""
if self.ts is None:
raise ValueError("Something went wrong, ts is None!")
if return_components:
raise NotImplementedError("Adding target components is not currently implemented!")

forecasts = Parallel(n_jobs=self.n_jobs, backend="multiprocessing", verbose=11)(
delayed(self._forecast_pipeline)(pipeline=pipeline) for pipeline in self.pipelines
Expand All @@ -144,9 +146,12 @@ def _predict(
end_timestamp: pd.Timestamp,
prediction_interval: bool,
quantiles: Sequence[float],
return_components: bool,
) -> TSDataset:
if prediction_interval:
raise NotImplementedError(f"Ensemble {self.__class__.__name__} doesn't support prediction intervals!")
if return_components:
raise NotImplementedError("Adding target components is not currently implemented!")

horizons = [pipeline.horizon for pipeline in self.pipelines]
pipeline = self.pipelines[np.argmin(horizons)]
Expand Down
8 changes: 7 additions & 1 deletion etna/ensembles/stacking_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,16 @@ def _process_forecasts(self, forecasts: List[TSDataset]) -> TSDataset:
result.loc[pd.IndexSlice[:], pd.IndexSlice[:, "target"]] = y
return result

def _forecast(self) -> TSDataset:
def _forecast(self, return_components: bool) -> TSDataset:
"""Make predictions.

Compute the combination of pipelines' forecasts using ``final_model``
"""
if self.ts is None:
raise ValueError("Something went wrong, ts is None!")
if return_components:
raise NotImplementedError("Adding target components is not currently implemented!")

forecasts = Parallel(n_jobs=self.n_jobs, **self.joblib_params)(
delayed(self._forecast_pipeline)(pipeline=pipeline) for pipeline in self.pipelines
)
Expand All @@ -255,9 +258,12 @@ def _predict(
end_timestamp: pd.Timestamp,
prediction_interval: bool,
quantiles: Sequence[float],
return_components: bool,
) -> TSDataset:
if prediction_interval:
raise NotImplementedError(f"Ensemble {self.__class__.__name__} doesn't support prediction intervals!")
if return_components:
raise NotImplementedError("Adding target components is not currently implemented!")

predictions = Parallel(n_jobs=self.n_jobs, **self.joblib_params)(
delayed(self._predict_pipeline)(
Expand Down
7 changes: 6 additions & 1 deletion etna/ensembles/voting_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,13 +199,15 @@ def _vote(self, forecasts: List[TSDataset]) -> TSDataset:
forecast_dataset = TSDataset(df=forecast_df, freq=forecasts[0].freq)
return forecast_dataset

def _forecast(self) -> TSDataset:
def _forecast(self, return_components: bool) -> TSDataset:
"""Make predictions.

Compute weighted average of pipelines' forecasts
"""
if self.ts is None:
raise ValueError("Something went wrong, ts is None!")
if return_components:
raise NotImplementedError("Adding target components is not currently implemented!")

forecasts = Parallel(n_jobs=self.n_jobs, backend="multiprocessing", verbose=11)(
delayed(self._forecast_pipeline)(pipeline=pipeline) for pipeline in self.pipelines
Expand All @@ -220,9 +222,12 @@ def _predict(
end_timestamp: pd.Timestamp,
prediction_interval: bool,
quantiles: Sequence[float],
return_components: bool,
) -> TSDataset:
if prediction_interval:
raise NotImplementedError(f"Ensemble {self.__class__.__name__} doesn't support prediction intervals!")
if return_components:
raise NotImplementedError("Adding target components is not currently implemented!")

self.ts = cast(TSDataset, self.ts)
predictions = Parallel(n_jobs=self.n_jobs, backend="multiprocessing", verbose=11)(
Expand Down
24 changes: 23 additions & 1 deletion etna/pipeline/autoregressive_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,12 @@ def _create_predictions_template(self) -> pd.DataFrame:
prediction_df.index.name = "timestamp"
return prediction_df

def _forecast(self) -> TSDataset:
def _forecast(self, return_components: bool) -> TSDataset:
"""Make predictions."""
if self.ts is None:
raise ValueError("Something went wrong, ts is None!")
if return_components:
raise NotImplementedError("Adding target components is not currently implemented!")
prediction_df = self._create_predictions_template()

for idx_start in range(0, self.horizon, self.step):
Expand Down Expand Up @@ -158,3 +160,23 @@ def _forecast(self) -> TSDataset:
prediction_ts.df = prediction_ts.df.tail(self.horizon)
prediction_ts.raw_df = prediction_ts.raw_df.tail(self.horizon)
return prediction_ts

def _predict(
self,
ts: TSDataset,
start_timestamp: pd.Timestamp,
end_timestamp: pd.Timestamp,
prediction_interval: bool,
quantiles: Sequence[float],
return_components: bool = False,
) -> TSDataset:
if return_components:
raise NotImplementedError("Adding target components is not currently implemented!")
return super()._predict(
ts=ts,
start_timestamp=start_timestamp,
end_timestamp=end_timestamp,
prediction_interval=prediction_interval,
quantiles=quantiles,
return_components=return_components,
)
38 changes: 33 additions & 5 deletions etna/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,11 @@ def fit(self, ts: TSDataset) -> "AbstractPipeline":

@abstractmethod
def forecast(
self, prediction_interval: bool = False, quantiles: Sequence[float] = (0.025, 0.975), n_folds: int = 3
self,
prediction_interval: bool = False,
quantiles: Sequence[float] = (0.025, 0.975),
n_folds: int = 3,
return_components: bool = False,
) -> TSDataset:
"""Make predictions.

Expand All @@ -142,6 +146,8 @@ def forecast(
Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval
n_folds:
Number of folds to use in the backtest for prediction interval estimation
return_components:
If True additionally returns forecast components

Returns
-------
Expand All @@ -158,6 +164,7 @@ def predict(
end_timestamp: Optional[pd.Timestamp] = None,
prediction_interval: bool = False,
quantiles: Sequence[float] = (0.025, 0.975),
return_components: bool = False,
) -> TSDataset:
"""Make in-sample predictions on dataset in a given range.

Expand All @@ -179,6 +186,8 @@ def predict(
If True returns prediction interval for forecast.
quantiles:
Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval.
return_components:
If True additionally returns forecast components

Returns
-------
Expand Down Expand Up @@ -258,7 +267,7 @@ def _validate_quantiles(quantiles: Sequence[float]) -> Sequence[float]:
return quantiles

@abstractmethod
def _forecast(self) -> TSDataset:
def _forecast(self, return_components: bool) -> TSDataset:
"""Make predictions."""
pass

Expand Down Expand Up @@ -299,7 +308,11 @@ def _add_forecast_borders(
predictions.df = pd.concat([predictions.df] + borders, axis=1).sort_index(axis=1, level=(0, 1))

def forecast(
self, prediction_interval: bool = False, quantiles: Sequence[float] = (0.025, 0.975), n_folds: int = 3
self,
prediction_interval: bool = False,
quantiles: Sequence[float] = (0.025, 0.975),
n_folds: int = 3,
return_components: bool = False,
) -> TSDataset:
"""Make predictions.

Expand All @@ -311,11 +324,18 @@ def forecast(
Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval
n_folds:
Number of folds to use in the backtest for prediction interval estimation
return_components:
If True additionally returns forecast components

Returns
-------
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
:
Dataset with predictions

Raises
------
NotImplementedError:
Adding target components is not currently implemented
"""
if self.ts is None:
raise ValueError(
Expand All @@ -325,7 +345,7 @@ def forecast(
self._validate_quantiles(quantiles=quantiles)
self._validate_backtest_n_folds(n_folds=n_folds)

predictions = self._forecast()
predictions = self._forecast(return_components=return_components)
if prediction_interval:
predictions = self._forecast_prediction_interval(
predictions=predictions, quantiles=quantiles, n_folds=n_folds
Expand Down Expand Up @@ -356,15 +376,17 @@ def _make_predict_timestamps(

return start_timestamp, end_timestamp

@abstractmethod
def _predict(
self,
ts: TSDataset,
start_timestamp: Optional[pd.Timestamp],
end_timestamp: Optional[pd.Timestamp],
prediction_interval: bool,
quantiles: Sequence[float],
return_components: bool,
) -> TSDataset:
raise NotImplementedError("Predict method isn't implemented!")
pass

def predict(
self,
Expand All @@ -373,6 +395,7 @@ def predict(
end_timestamp: Optional[pd.Timestamp] = None,
prediction_interval: bool = False,
quantiles: Sequence[float] = (0.025, 0.975),
return_components: bool = False,
) -> TSDataset:
"""Make in-sample predictions on dataset in a given range.

Expand All @@ -394,6 +417,8 @@ def predict(
If True returns prediction interval for forecast.
quantiles:
Levels of prediction distribution. By default 2.5% and 97.5% taken to form a 95% prediction interval.
return_components:
If True additionally returns forecast components

Returns
-------
Expand All @@ -408,6 +433,8 @@ def predict(
Value of ``start_timestamp`` goes before point where each segment started.
ValueError:
Value of ``end_timestamp`` goes after the last timestamp.
NotImplementedError:
Adding target components is not currently implemented
"""
start_timestamp, end_timestamp = self._make_predict_timestamps(
ts=ts, start_timestamp=start_timestamp, end_timestamp=end_timestamp
Expand All @@ -419,6 +446,7 @@ def predict(
end_timestamp=end_timestamp,
prediction_interval=prediction_interval,
quantiles=quantiles,
return_components=return_components,
alex-hse-repository marked this conversation as resolved.
Show resolved Hide resolved
)
return result

Expand Down
Loading