Skip to content

Commit e6f2208

Browse files
authored
Fix/operand error with encoders (#2034)
* fix: create a temporary Datetime index when series frequency represents a ambiguous timedelta value to extract the start time index * feat: updated changelog * fix: fixed corner case, generate the shortest temporary datetimeindex possible * feat: added tests to cover the cases where the series freq cannot be converted to Timedelta
1 parent f3bdbcf commit e6f2208

File tree

3 files changed

+40
-13
lines changed

3 files changed

+40
-13
lines changed

CHANGELOG.md

+6-1
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,15 @@ but cannot always guarantee backwards compatibility. Changes that may **break co
99
[Full Changelog](https://github.com/unit8co/darts/compare/0.26.0...master)
1010

1111
### For users of the library:
12+
**Improved**
1213
- Improvements to `TorchForecastingModel`:
1314
- Added callback `darts.utils.callbacks.TFMProgressBar` to customize at which model stages to display the progress bar. [#2020](https://github.com/unit8co/darts/pull/2020) by [Dennis Bader](https://github.com/dennisbader).
1415
- Improvements to documentation:
15-
- Adapted the example notebooks to properly apply data transformers and avoid look-ahead bias. [#2020](https://github.com/unit8co/darts/pull/2020) by [Samriddhi Singh](https://github.com/SimTheGreat).
16+
- Adapted the example notebooks to properly apply data transformers and avoid look-ahead bias. [#2020](https://github.com/unit8co/darts/pull/2020) by [Samriddhi Singh](https://github.com/SimTheGreat).
17+
18+
**Fixed**
19+
- Fixed a bug when trying to divide `pd.Timedelta` by a `pd.Offset` with an ambiguous conversion to `pd.Timedelta` when using encoders. [#2034](https://github.com/unit8co/darts/pull/2034) by [Antoine Madrona](https://github.com/madtoinou).
20+
1621
### For developers of the library:
1722

1823
## [0.26.0](https://github.com/unit8co/darts/tree/0.26.0) (2023-09-16)

darts/tests/utils/tabularization/test_create_lagged_training_data.py

+13-6
Original file line numberDiff line numberDiff line change
@@ -1132,37 +1132,44 @@ def test_lagged_training_data_extend_past_and_future_covariates_range_idx(self):
11321132
assert np.allclose(expected_X, X[:, :, 0])
11331133
assert np.allclose(expected_y, y[:, :, 0])
11341134

1135-
def test_lagged_training_data_extend_past_and_future_covariates_datetime_idx(self):
1135+
@pytest.mark.parametrize("freq", ["D", "MS", "Y"])
1136+
def test_lagged_training_data_extend_past_and_future_covariates_datetime_idx(
1137+
self, freq
1138+
):
11361139
"""
11371140
Tests that `create_lagged_training_data` correctly handles case where features
11381141
and labels can be created for a time that is *not* contained in `past_covariates`
11391142
and/or `future_covariates`. This particular test checks this behaviour by using
1140-
datetime index timeseries.
1143+
datetime index timeseries and three different frequencies: daily, month start and
1144+
year end.
11411145
11421146
More specifically, we define the series and lags such that a training example can
11431147
be generated for time `target.end_time()`, even though this time isn't contained in
11441148
neither `past` nor `future`.
11451149
"""
1146-
# Can create feature for time `t = '1/11/2000'`, but this time isn't in `past` or `future`:
1150+
# Can create feature for time `t = '1/1/2000'+11*freq`, but this time isn't in `past` or `future`:
11471151
target = linear_timeseries(
11481152
start=pd.Timestamp("1/1/2000"),
1149-
end=pd.Timestamp("1/11/2000"),
11501153
start_value=1,
11511154
end_value=2,
1155+
length=11,
1156+
freq=freq,
11521157
)
11531158
lags = [-1]
11541159
past = linear_timeseries(
11551160
start=pd.Timestamp("1/1/2000"),
1156-
end=pd.Timestamp("1/9/2000"),
11571161
start_value=2,
11581162
end_value=3,
1163+
length=9,
1164+
freq=freq,
11591165
)
11601166
lags_past = [-2]
11611167
future = linear_timeseries(
11621168
start=pd.Timestamp("1/1/2000"),
1163-
end=pd.Timestamp("1/7/2000"),
11641169
start_value=3,
11651170
end_value=4,
1171+
length=7,
1172+
freq=freq,
11661173
)
11671174
lags_future = [-4]
11681175
# Only want to check very last generated observation:

darts/utils/data/tabularization.py

+21-6
Original file line numberDiff line numberDiff line change
@@ -883,12 +883,27 @@ def _create_lagged_data_by_moving_window(
883883
# for all feature times - these values will become labels.
884884
# If `start_time` not included in `time_index_i`, can 'manually' calculate
885885
# what its index *would* be if `time_index_i` were extended to include that time:
886-
if not is_target_series and (time_index_i[-1] <= start_time):
887-
start_time_idx = (
888-
len(time_index_i)
889-
- 1
890-
+ (start_time - time_index_i[-1]) // series_i.freq
891-
)
886+
if not is_target_series and (time_index_i[-1] < start_time):
887+
# Series frequency represents a non-ambiguous timedelta value (not ‘M’, ‘Y’ or ‘y’)
888+
if pd.to_timedelta(series_i.freq, errors="coerce") is not pd.NaT:
889+
start_time_idx = (
890+
len(time_index_i)
891+
- 1
892+
+ (start_time - time_index_i[-1]) // series_i.freq
893+
)
894+
else:
895+
# Create a temporary DatetimeIndex to extract the actual start index.
896+
start_time_idx = (
897+
len(time_index_i)
898+
- 1
899+
+ len(
900+
pd.date_range(
901+
start=time_index_i[-1] + series_i.freq,
902+
end=start_time,
903+
freq=series_i.freq,
904+
)
905+
)
906+
)
892907
elif not is_target_series and (time_index_i[0] >= start_time):
893908
start_time_idx = max_lag_i
894909
# If `start_time` *is* included in `time_index_i`, need to binary search `time_index_i`

0 commit comments

Comments
 (0)