Skip to content

Fix performance of DeepARModel and TFTModel #1322

Merged
merged 3 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
-
-
-
- Fix performance of `DeepARModel` and `TFTModel` ([#1322](https://github.com/tinkoff-ai/etna/pull/1322))
- `mrmr` feature selection working with categoricals ([#1311](https://github.com/tinkoff-ai/etna/pull/1311))
- Fix version of `statsforecast` to 1.4 to avoid dependency conflicts during installation ([#1313](https://github.com/tinkoff-ai/etna/pull/1313))
- Add inverse transformation into `predict` method of pipelines ([#1314](https://github.com/tinkoff-ai/etna/pull/1314))
Expand Down
18 changes: 11 additions & 7 deletions etna/models/nn/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,16 @@ def create_train_dataset(self, ts: TSDataset) -> TimeSeriesDataSet:
"""
df_flat = ts.to_pandas(flatten=True)
df_flat = df_flat.dropna()
self.min_timestamp = df_flat.timestamp.min()

mapping_time_idx = {x: i for i, x in enumerate(ts.index)}
df_flat["time_idx"] = df_flat["timestamp"].map(mapping_time_idx)

self.min_timestamp = df_flat["timestamp"].min()

if self.time_varying_known_categoricals:
for feature_name in self.time_varying_known_categoricals:
df_flat[feature_name] = df_flat[feature_name].astype(str)

# making time_idx feature.
# it's needed for pytorch-forecasting for proper train-test split.
# it should be incremented by 1 for every new timestamp.
df_flat["time_idx"] = df_flat["timestamp"].apply(lambda x: determine_num_steps(self.min_timestamp, x, ts.freq))

pf_dataset = TimeSeriesDataSet(
df_flat,
time_idx="time_idx",
Expand Down Expand Up @@ -192,7 +191,12 @@ def create_inference_dataset(self, ts: TSDataset, horizon: int) -> TimeSeriesDat
df_flat = df_flat[df_flat.timestamp >= self.min_timestamp]
df_flat["target"] = df_flat["target"].fillna(0)

df_flat["time_idx"] = df_flat["timestamp"].apply(lambda x: determine_num_steps(self.min_timestamp, x, ts.freq))
inference_min_timestamp = df_flat["timestamp"].min()
time_idx_shift = determine_num_steps(
start_timestamp=self.min_timestamp, end_timestamp=inference_min_timestamp, freq=ts.freq
)
mapping_time_idx = {x: i + time_idx_shift for i, x in enumerate(ts.index)}
df_flat["time_idx"] = df_flat["timestamp"].map(mapping_time_idx)

if self.time_varying_known_categoricals:
for feature_name in self.time_varying_known_categoricals:
Expand Down