-
Notifications
You must be signed in to change notification settings - Fork 82
Fix performance of DeepARModel
and TFTModel
#1322
Merged
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Script for old version (1.15.1): import time
import random
import torch
import pandas as pd
import numpy as np
from loguru import logger
from etna.datasets.tsdataset import TSDataset
from etna.datasets import generate_ar_df
from etna.pipeline import Pipeline
from etna.metrics import SMAPE, MAPE, MAE
from etna.transforms import DateFlagsTransform
from etna.transforms import PytorchForecastingTransform
from etna.models.nn import TFTModel
HORIZON = 7
def generate_tsdataset(dataset_config) -> TSDataset:
periods, n_segments, regressors, exogs, horizon = (
dataset_config["periods"],
dataset_config["n_segments"],
dataset_config["regressors"],
dataset_config["exogs"],
dataset_config["horizon"],
)
df = generate_ar_df(
periods=periods,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_exog = None
if exogs:
df_exog = generate_ar_df(
periods=periods + horizon,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_exog = df_exog.rename(columns={"target": "exog"})
if regressors:
df_regressors = generate_ar_df(
periods=periods + horizon,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_regressors = df_regressors.rename(columns={"target": "regressor"})
df_exog = pd.concat((df_exog, df_regressors[["regressor"]]), axis=1)
df_exog = TSDataset.to_dataset(df_exog)
df = TSDataset.to_dataset(df)
ts = TSDataset(
df=df,
freq="D",
df_exog=df_exog,
known_future=["regressor"] if regressors else (),
)
return ts
def set_seed(seed: int = 42):
"""Set random seed for reproducibility."""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def main():
set_seed()
# original_df = pd.read_csv("examples/data/example_dataset.csv")
# df = TSDataset.to_dataset(original_df)
# ts = TSDataset(df, freq="D")
data_config = {
"n_segments": 100,
"periods": 500,
"exogs": True,
"regressors": True,
"horizon": 14,
}
ts = generate_tsdataset(data_config)
set_seed()
dft = DateFlagsTransform(day_number_in_week=True, day_number_in_month=False, out_column="regressor_dateflag")
pft = PytorchForecastingTransform(
max_encoder_length=21,
min_encoder_length=21,
max_prediction_length=HORIZON,
time_varying_known_reals=["time_idx"],
time_varying_known_categoricals=["regressor_dateflag_day_number_in_week"],
time_varying_unknown_reals=["target"],
static_categoricals=["segment"],
target_normalizer=None,
)
model_tft = TFTModel(trainer_kwargs=dict(max_epochs=1))
transforms = [dft, pft]
pipeline_tft = Pipeline(model=model_tft, transforms=transforms, horizon=HORIZON)
start_time = time.perf_counter()
metrics_tft, forecast_tft, fold_info_tft = pipeline_tft.backtest(
ts, metrics=[SMAPE(), MAPE(), MAE()], n_folds=3, n_jobs=1
)
run_time = time.perf_counter() - start_time
logger.info(f"Run time: {run_time:.3f}")
logger.info(f"Metrics: {metrics_tft['MAE'].mean():.3f}")
if __name__ == "__main__":
main() Results:
|
Script for new version: import time
import random
import torch
import pandas as pd
import numpy as np
from loguru import logger
from etna.datasets.tsdataset import TSDataset
from etna.datasets import generate_ar_df
from etna.pipeline import Pipeline
from etna.metrics import SMAPE, MAPE, MAE
from etna.transforms import DateFlagsTransform
from etna.models.nn.utils import PytorchForecastingDatasetBuilder
from etna.models.nn import TFTModel
HORIZON = 7
def generate_tsdataset(dataset_config) -> TSDataset:
periods, n_segments, regressors, exogs, horizon = (
dataset_config["periods"],
dataset_config["n_segments"],
dataset_config["regressors"],
dataset_config["exogs"],
dataset_config["horizon"],
)
df = generate_ar_df(
periods=periods,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_exog = None
if exogs:
df_exog = generate_ar_df(
periods=periods + horizon,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_exog = df_exog.rename(columns={"target": "exog"})
if regressors:
df_regressors = generate_ar_df(
periods=periods + horizon,
start_time="2021-06-01",
n_segments=n_segments,
freq="D",
)
df_regressors = df_regressors.rename(columns={"target": "regressor"})
df_exog = pd.concat((df_exog, df_regressors[["regressor"]]), axis=1)
df_exog = TSDataset.to_dataset(df_exog)
df = TSDataset.to_dataset(df)
ts = TSDataset(
df=df,
freq="D",
df_exog=df_exog,
known_future=["regressor"] if regressors else (),
)
return ts
def set_seed(seed: int = 42):
"""Set random seed for reproducibility."""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
def main():
set_seed()
# original_df = pd.read_csv("examples/data/example_dataset.csv")
# df = TSDataset.to_dataset(original_df)
# ts = TSDataset(df, freq="D")
data_config = {
"n_segments": 100,
"periods": 500,
"exogs": True,
"regressors": True,
"horizon": 14,
}
ts = generate_tsdataset(data_config)
set_seed()
dft = DateFlagsTransform(day_number_in_week=True, day_number_in_month=False, out_column="regressor_dateflag")
model_tft = TFTModel(
dataset_builder=PytorchForecastingDatasetBuilder(
max_encoder_length=21,
min_encoder_length=21,
max_prediction_length=HORIZON,
time_varying_known_reals=["time_idx"],
time_varying_known_categoricals=["regressor_dateflag_day_number_in_week"],
time_varying_unknown_reals=["target"],
static_categoricals=["segment"],
target_normalizer=None,
),
trainer_params=dict(max_epochs=1),
)
transforms = [dft]
pipeline_tft = Pipeline(model=model_tft, transforms=transforms, horizon=HORIZON)
start_time = time.perf_counter()
metrics_tft, forecast_tft, fold_info_tft = pipeline_tft.backtest(
ts, metrics=[SMAPE(), MAPE(), MAE()], n_folds=3, n_jobs=1
)
run_time = time.perf_counter() - start_time
logger.info(f"Run time: {run_time:.3f}")
logger.info(f"Metrics: {metrics_tft['MAE'].mean():.3f}")
if __name__ == "__main__":
main() Results:
|
Codecov Report
❗ Your organization is not using the GitHub App Integration. As a result you may experience degraded service beginning May 15th. Please install the Github App Integration for your organization. Read more. @@ Coverage Diff @@
## master #1322 +/- ##
==========================================
+ Coverage 88.95% 89.09% +0.14%
==========================================
Files 193 204 +11
Lines 12319 12638 +319
==========================================
+ Hits 10958 11260 +302
- Misses 1361 1378 +17
... and 11 files with indirect coverage changes 📣 We’re building smart automated test selection to slash your CI/CD build times. Learn more |
🚀 Deployed on https://deploy-preview-1322--etna-docs.netlify.app |
alex-hse-repository
approved these changes
Jul 24, 2023
1 task
Sign up for free
to subscribe to this conversation on GitHub.
Already have an account?
Sign in.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Before submitting (must do checklist)
Proposed Changes
Optmize creation of
time_idx
feature.Closing issues