-
Notifications
You must be signed in to change notification settings - Fork 82
Teach BATS
/TBATS
to work with in-sample, out-sample predictions correctly
#806
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
6c27d13
Add basic implementation
1511b84
Remove warning about frequency
9ff2d78
Fix positions of some cases
bde7ca6
Update changelog
d0e8807
Fix test_prediction_interval
f8582b8
Update error message
7bbe554
Add test in mised in-sample out-sample prediction
7e31a99
Add separate implementation of determine_num_steps_to_forecast and te…
fa90f48
Remove changing SARIMAX
10cc33e
Merge branch 'master' into issue-800
martins0n File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import pandas as pd | ||
|
||
|
||
def determine_num_steps_to_forecast( | ||
last_train_timestamp: pd.Timestamp, last_test_timestamp: pd.Timestamp, freq: str | ||
) -> int: | ||
"""Determine number of steps to make a forecast in future. | ||
|
||
It is useful for out-sample forecast with gap if model predicts only on a certain number of steps | ||
in autoregressive manner. | ||
|
||
Parameters | ||
---------- | ||
last_train_timestamp: | ||
last timestamp in train data | ||
last_test_timestamp: | ||
last timestamp in test data, should be after ``last_train_timestamp`` | ||
freq: | ||
pandas frequency string: `Offset aliases <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`_ | ||
|
||
Returns | ||
------- | ||
: | ||
number of steps | ||
|
||
Raises | ||
------ | ||
ValueError: | ||
Value of last test timestamp is less or equal than last train timestamp | ||
ValueError: | ||
Last train timestamp isn't correct according to a given frequency | ||
ValueError: | ||
Last test timestamps isn't reachable with a given frequency | ||
""" | ||
if last_test_timestamp <= last_train_timestamp: | ||
raise ValueError("Last train timestamp should be less than last test timestamp!") | ||
|
||
# check if last_train_timestamp is normalized | ||
normalized_last_train_timestamp = pd.date_range(start=last_train_timestamp, periods=1, freq=freq) | ||
if normalized_last_train_timestamp != last_train_timestamp: | ||
raise ValueError(f"Last train timestamp isn't correct according to given frequency: {freq}") | ||
|
||
# make linear probing, because for complex offsets there is a cycle in `pd.date_range` | ||
cur_value = 1 | ||
while True: | ||
timestamps = pd.date_range(start=last_train_timestamp, periods=cur_value + 1, freq=freq) | ||
if timestamps[-1] == last_test_timestamp: | ||
return cur_value | ||
elif timestamps[-1] > last_test_timestamp: | ||
raise ValueError(f"Last test timestamps isn't reachable with freq: {freq}") | ||
cur_value += 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import pandas as pd | ||
import pytest | ||
|
||
from etna.models.utils import determine_num_steps_to_forecast | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"last_train_timestamp, last_test_timestamp, freq, answer", | ||
[ | ||
(pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-02"), "D", 1), | ||
(pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-11"), "D", 10), | ||
(pd.Timestamp("2020-01-05"), pd.Timestamp("2020-01-19"), "W-SUN", 2), | ||
(pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-15"), pd.offsets.Week(), 2), | ||
(pd.Timestamp("2020-01-31"), pd.Timestamp("2021-02-28"), "M", 13), | ||
(pd.Timestamp("2020-01-01"), pd.Timestamp("2021-06-01"), "MS", 17), | ||
], | ||
) | ||
def test_determine_num_steps_to_forecast_ok(last_train_timestamp, last_test_timestamp, freq, answer): | ||
result = determine_num_steps_to_forecast( | ||
last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq | ||
) | ||
assert result == answer | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"last_train_timestamp, last_test_timestamp, freq", | ||
[ | ||
(pd.Timestamp("2020-01-01"), pd.Timestamp("2020-01-01"), "D"), | ||
(pd.Timestamp("2020-01-02"), pd.Timestamp("2020-01-01"), "D"), | ||
], | ||
) | ||
def test_determine_num_steps_to_forecast_fail_wrong_order(last_train_timestamp, last_test_timestamp, freq): | ||
with pytest.raises(ValueError, match="Last train timestamp should be less than last test timestamp"): | ||
_ = determine_num_steps_to_forecast( | ||
last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq | ||
) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"last_train_timestamp, last_test_timestamp, freq", | ||
[ | ||
(pd.Timestamp("2020-01-02"), pd.Timestamp("2020-06-01"), "M"), | ||
(pd.Timestamp("2020-01-02"), pd.Timestamp("2020-06-01"), "MS"), | ||
], | ||
) | ||
def test_determine_num_steps_to_forecast_fail_wrong_start(last_train_timestamp, last_test_timestamp, freq): | ||
with pytest.raises(ValueError, match="Last train timestamp isn't correct according to given frequency"): | ||
_ = determine_num_steps_to_forecast( | ||
last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq | ||
) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"last_train_timestamp, last_test_timestamp, freq", | ||
[ | ||
(pd.Timestamp("2020-01-31"), pd.Timestamp("2020-06-05"), "M"), | ||
(pd.Timestamp("2020-01-01"), pd.Timestamp("2020-06-05"), "MS"), | ||
], | ||
) | ||
def test_determine_num_steps_to_forecast_fail_wrong_end(last_train_timestamp, last_test_timestamp, freq): | ||
with pytest.raises(ValueError, match="Last test timestamps isn't reachable with freq"): | ||
_ = determine_num_steps_to_forecast( | ||
last_train_timestamp=last_train_timestamp, last_test_timestamp=last_test_timestamp, freq=freq | ||
) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's rewrite message smth like:
in-sample predictions are not supported by current implementation