From 9dbe81fe3a35589d3a707db177d4ce5122b0811f Mon Sep 17 00:00:00 2001 From: madtoinou Date: Thu, 29 Jun 2023 09:05:07 +0200 Subject: [PATCH 01/21] feat: historical_foreacst accept negative integer as start value --- .../forecasting/test_historical_forecasts.py | 21 +++++++++++++++---- darts/tests/test_timeseries.py | 21 ++++++++++++++++++- darts/timeseries.py | 6 ++++-- darts/utils/utils.py | 21 ++++++++++++------- 4 files changed, 55 insertions(+), 14 deletions(-) diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index c5d0219932..40ced94671 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -352,6 +352,15 @@ def test_historical_forecasts_local_models(self): "LocalForecastingModel does not support historical forecasting with `retrain` set to `False`" ) + def test_historical_forecasts_negative_start(self): + series = tg.sine_timeseries(length=10) + + model = LinearRegressionModel(lags=2) + model.fit(series[:8]) + + forecasts = model.historical_forecasts(series=series, start=-2, retrain=False) + self.assertEqual(len(forecasts), 2) + def test_historical_forecasts(self): train_length = 10 forecast_horizon = 8 @@ -550,14 +559,18 @@ def test_sanity_check_invalid_start(self): ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step1, start=rangeidx_step1.start_time() - rangeidx_step1.freq + rangeidx_step1, start=-11 ) - assert str(msg.value).startswith("if `start` is an integer, must be `>= 0`") + assert str(msg.value).startswith( + "`start` index `-11` is out of bounds for series of length 10" + ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step2, start=rangeidx_step2.start_time() - rangeidx_step2.freq + rangeidx_step2, start=-11 ) - assert str(msg.value).startswith("if `start` is an integer, must be `>= 0`") + assert str(msg.value).startswith( + "`start` index `-11` is out of bounds for series of length 10" + ) # value too high with pytest.raises(ValueError) as msg: diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py index 55319b261e..546466a7da 100644 --- a/darts/tests/test_timeseries.py +++ b/darts/tests/test_timeseries.py @@ -113,6 +113,11 @@ def test_integer_range_indexing(self): # getting index for idx should return i s.t., series[i].time == idx self.assertEqual(series.get_index_at_point(101), 91) + # getting index for negative idx return idx + len(ts) + self.assertEqual(series.get_index_at_point(-3), 97) + # getting index for negative idx greater than the ts length + with self.assertRaises(ValueError): + series.get_index_at_point(-(len(series) + 1)) # slicing outside of the index range should return an empty ts self.assertEqual(len(series[120:125]), 0) @@ -130,6 +135,8 @@ def test_integer_range_indexing(self): # getting index for idx should return i s.t., series[i].time == idx self.assertEqual(series.get_index_at_point(100), 50) + # getting index for negative idx return idx + len(ts) + self.assertEqual(series.get_index_at_point(-1), 99) # getting index outside of the index range should raise an exception with self.assertRaises(IndexError): @@ -158,6 +165,8 @@ def test_integer_range_indexing(self): # getting index for idx should return i s.t., series[i].time == idx self.assertEqual(series.get_index_at_point(16), 3) + # getting index for negative idx return idx + len(ts) + self.assertEqual(series.get_index_at_point(-2), 8) def test_integer_indexing(self): n = 10 @@ -493,15 +502,25 @@ def helper_test_split(test_case, test_series: TimeSeries): test_case.assertEqual(len(seriesK), 5) test_case.assertEqual(len(seriesL), len(test_series) - 5) + seriesM, seriesN = test_series.split_after(-2) + test_case.assertEqual(len(seriesM), len(test_series) - len(seriesN)) + test_case.assertEqual(len(seriesN), 1) + + seriesO, seriesP = test_series.split_before(-2) + test_case.assertEqual(len(seriesO), len(test_series) - len(seriesP)) + test_case.assertEqual(len(seriesP), 2) + test_case.assertEqual(test_series.freq_str, seriesA.freq_str) test_case.assertEqual(test_series.freq_str, seriesC.freq_str) test_case.assertEqual(test_series.freq_str, seriesE.freq_str) test_case.assertEqual(test_series.freq_str, seriesG.freq_str) test_case.assertEqual(test_series.freq_str, seriesI.freq_str) test_case.assertEqual(test_series.freq_str, seriesK.freq_str) + test_case.assertEqual(test_series.freq_str, seriesM.freq_str) + test_case.assertEqual(test_series.freq_str, seriesO.freq_str) # Test split points outside of range - for value in [-5, 1.1, pd.Timestamp("21300104")]: + for value in [1.1, pd.Timestamp("21300104")]: with test_case.assertRaises(ValueError): test_series.split_before(value) diff --git a/darts/timeseries.py b/darts/timeseries.py index 2bfe9808b6..2f2ae0698f 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -215,7 +215,7 @@ def __init__(self, xa: xr.DataArray): logger, ) else: - self._freq = self._time_index.step + self._freq: int = self._time_index.step self._freq_str = None # check static covariates @@ -2085,7 +2085,9 @@ def get_index_at_point( ) point_index = int((len(self) - 1) * point) elif isinstance(point, (int, np.int64)): - if self.has_datetime_index or (self.start_time() == 0 and self.freq == 1): + if point < 0: + point_index = point + len(self) + elif self.has_datetime_index or (self.start_time() == 0 and self.freq == 1): point_index = point else: point_index_float = (point - self.start_time()) / self.freq diff --git a/darts/utils/utils.py b/darts/utils/utils.py index fb60f1c6da..0b2bf605d4 100644 --- a/darts/utils/utils.py +++ b/darts/utils/utils.py @@ -230,9 +230,7 @@ def _historical_forecasts_general_checks(series, kwargs): 0.0 <= n.start <= 1.0, "`start` should be between 0.0 and 1.0.", logger ) elif isinstance(n.start, (int, np.int64)): - raise_if_not( - n.start >= 0, "if `start` is an integer, must be `>= 0`.", logger - ) + pass # verbose error messages if not isinstance(n.start, pd.Timestamp): @@ -259,10 +257,17 @@ def _historical_forecasts_general_checks(series, kwargs): logger, ) elif isinstance(n.start, (int, np.int64)): - if ( + raise_out_of_bound_exception = False + # negative index is index type and frequency independent + if n.start < 0 and np.abs(n.start) > len(series_): + raise_out_of_bound_exception = True + elif ( series_.has_datetime_index or (series_.has_range_index and series_.freq == 1) ) and n.start >= len(series_): + raise_out_of_bound_exception = True + + if raise_out_of_bound_exception: raise_log( ValueError( f"`start` index `{n.start}` is out of bounds for series of length {len(series_)} " @@ -270,9 +275,11 @@ def _historical_forecasts_general_checks(series, kwargs): ), logger, ) - elif ( - series_.has_range_index and series_.freq > 1 - ) and n.start > series_.time_index[-1]: + if ( + series_.has_range_index + and series_.freq > 1 + and n.start > series_.time_index[-1] + ): raise_log( ValueError( f"`start` index `{n.start}` is larger than the last index `{series_.time_index[-1]}` " From 344e9290fb4b7d6548b47581400c8b97dc400a34 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Thu, 29 Jun 2023 09:09:20 +0200 Subject: [PATCH 02/21] fix: improved the negative start unit test --- darts/tests/models/forecasting/test_historical_forecasts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index 40ced94671..004cb54804 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -360,6 +360,7 @@ def test_historical_forecasts_negative_start(self): forecasts = model.historical_forecasts(series=series, start=-2, retrain=False) self.assertEqual(len(forecasts), 2) + self.assertEqual(series.time_index[-2], forecasts.time_index[0]) def test_historical_forecasts(self): train_length = 10 From 50a2b1b20f6a0f2acb809dfff02a7eadc5f3830a Mon Sep 17 00:00:00 2001 From: madtoinou Date: Thu, 29 Jun 2023 09:25:51 +0200 Subject: [PATCH 03/21] fix: simplified the logic around exception raising --- darts/tests/test_timeseries.py | 3 -- darts/utils/utils.py | 63 ++++++++++++++-------------------- 2 files changed, 26 insertions(+), 40 deletions(-) diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py index 546466a7da..fe0982f284 100644 --- a/darts/tests/test_timeseries.py +++ b/darts/tests/test_timeseries.py @@ -115,9 +115,6 @@ def test_integer_range_indexing(self): self.assertEqual(series.get_index_at_point(101), 91) # getting index for negative idx return idx + len(ts) self.assertEqual(series.get_index_at_point(-3), 97) - # getting index for negative idx greater than the ts length - with self.assertRaises(ValueError): - series.get_index_at_point(-(len(series) + 1)) # slicing outside of the index range should return an empty ts self.assertEqual(len(series[120:125]), 0) diff --git a/darts/utils/utils.py b/darts/utils/utils.py index 0b2bf605d4..425514c53c 100644 --- a/darts/utils/utils.py +++ b/darts/utils/utils.py @@ -15,7 +15,7 @@ from tqdm.notebook import tqdm as tqdm_notebook from darts import TimeSeries -from darts.logging import get_logger, raise_if_not, raise_log +from darts.logging import get_logger, raise_if, raise_if_not, raise_log from darts.utils.timeseries_generation import generate_index try: @@ -257,47 +257,36 @@ def _historical_forecasts_general_checks(series, kwargs): logger, ) elif isinstance(n.start, (int, np.int64)): - raise_out_of_bound_exception = False - # negative index is index type and frequency independent - if n.start < 0 and np.abs(n.start) > len(series_): - raise_out_of_bound_exception = True - elif ( - series_.has_datetime_index - or (series_.has_range_index and series_.freq == 1) - ) and n.start >= len(series_): - raise_out_of_bound_exception = True - - if raise_out_of_bound_exception: - raise_log( - ValueError( - f"`start` index `{n.start}` is out of bounds for series of length {len(series_)} " - f"at index: {idx}." - ), - logger, - ) - if ( + raise_if( + (n.start < 0 and np.abs(n.start) > len(series_)) + or ( + ( + series_.has_datetime_index + or (series_.has_range_index and series_.freq == 1) + ) + and n.start >= len(series_) + ), + f"`start` index `{n.start}` is out of bounds for series of length {len(series_)} " + f"at index: {idx}.", + logger, + ) + raise_if( series_.has_range_index and series_.freq > 1 - and n.start > series_.time_index[-1] - ): - raise_log( - ValueError( - f"`start` index `{n.start}` is larger than the last index `{series_.time_index[-1]}` " - f"for series at index: {idx}." - ), - logger, - ) - - start = series_.get_timestamp_at_point(n.start) - if n.retrain is not False and start == series_.start_time(): - raise_log( - ValueError( - f"{start_value_msg} `{start}` is the first timestamp of the series {idx}, resulting in an " - f"empty training set." - ), + and n.start > series_.time_index[-1], + f"`start` index `{n.start}` is larger than the last index `{series_.time_index[-1]}` " + f"for series at index: {idx}.", logger, ) + start = series_.get_timestamp_at_point(n.start) + raise_if( + n.retrain is not False and start == series_.start_time(), + f"{start_value_msg} `{start}` is the first timestamp of the series {idx}, resulting in an " + f"empty training set.", + logger, + ) + # check that overlap_end and start together form a valid combination overlap_end = n.overlap_end if not overlap_end and not ( From eee099e65b922e3623e526d3be104893436f999c Mon Sep 17 00:00:00 2001 From: madtoinou Date: Fri, 11 Aug 2023 11:48:19 +0200 Subject: [PATCH 04/21] fix: instead of adding capabilities to get_index_at_point, use a new argument in historical_forecasts. Updated exception accordingly --- darts/models/forecasting/forecasting_model.py | 26 ++++-- darts/models/forecasting/regression_model.py | 5 +- darts/timeseries.py | 46 +---------- .../optimized_historical_forecasts.py | 6 +- darts/utils/historical_forecasts/utils.py | 80 +++++++++++-------- 5 files changed, 79 insertions(+), 84 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 311897f0eb..20e0775028 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -22,7 +22,18 @@ from collections import OrderedDict from itertools import product from random import sample -from typing import Any, BinaryIO, Callable, Dict, List, Optional, Sequence, Tuple, Union +from typing import ( + Any, + BinaryIO, + Callable, + Dict, + List, + Literal, + Optional, + Sequence, + Tuple, + Union, +) import numpy as np import pandas as pd @@ -559,7 +570,8 @@ def historical_forecasts( future_covariates: Optional[Union[TimeSeries, Sequence[TimeSeries]]] = None, num_samples: int = 1, train_length: Optional[int] = None, - start: Optional[Union[pd.Timestamp, float, int, Dict]] = None, + start: Optional[Union[pd.Timestamp, float, int]] = None, + start_format: Literal["point", "index"] = "point", forecast_horizon: int = 1, stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, @@ -610,13 +622,11 @@ def historical_forecasts( `min_train_series_length`. start Optionally, the first point in time at which a prediction is computed for a future time. - This parameter supports: ``float``, ``int`` and ``pandas.Timestamp``, and ``None``. + This parameter supports: ``float``, ``int``, ``pandas.Timestamp``, and ``None``. If a ``float``, the parameter will be treated as the proportion of the time series that should lie before the first prediction point. If an ``int``, the parameter will be treated as an integer index to the time index of `series` that will be used as first prediction time. - If a ``dict`` defined as {"index":int, "from":"start"/"end"}, the parameter will be considered - as the relative index of the first prediction point. If a ``pandas.Timestamp``, the time stamp will be used to determine the first prediction time directly. If ``None``, the first prediction time will automatically be set to: @@ -630,6 +640,9 @@ def historical_forecasts( Note: Raises a ValueError if `start` yields a time outside the time index of `series`. Note: If `start` is outside the possible historical forecasting times, will ignore the parameter (default behavior with ``None``) and start at the first trainable/predictable point. + start_format + If set to 'index', `start` must be an integer and corresponds to the absolute position of the first point + in time at which the prediction is generated. Default: ``'point'``. forecast_horizon The forecast horizon for the predictions. stride @@ -800,6 +813,7 @@ def retrain_func( future_covariates=future_covariates, num_samples=num_samples, start=start, + start_format=start_format, forecast_horizon=forecast_horizon, stride=stride, overlap_end=overlap_end, @@ -878,6 +892,7 @@ def retrain_func( forecast_horizon=forecast_horizon, overlap_end=overlap_end, start=start, + start_format=start_format, show_warnings=show_warnings, ) @@ -1895,6 +1910,7 @@ def _optimized_historical_forecasts( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, + start_format: Literal["point", "index"] = "point", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 1849dd8d1c..908d44f321 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -27,7 +27,7 @@ if their static covariates do not have the same size, the shorter ones are padded with 0 valued features. """ from collections import OrderedDict -from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union import numpy as np import pandas as pd @@ -897,6 +897,7 @@ def _optimized_historical_forecasts( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, + start_format: Literal["point", "index"] = "point", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, @@ -949,6 +950,7 @@ def _optimized_historical_forecasts( future_covariates=future_covariates, num_samples=num_samples, start=start, + start_format=start_format, forecast_horizon=forecast_horizon, stride=stride, overlap_end=overlap_end, @@ -963,6 +965,7 @@ def _optimized_historical_forecasts( future_covariates=future_covariates, num_samples=num_samples, start=start, + start_format=start_format, forecast_horizon=forecast_horizon, stride=stride, overlap_end=overlap_end, diff --git a/darts/timeseries.py b/darts/timeseries.py index 4f3cd4f8fd..ab3e3968c5 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -2056,7 +2056,7 @@ def copy(self) -> Self: return self.__class__(self._xa) def get_index_at_point( - self, point: Union[pd.Timestamp, float, int, Dict[str, int]], after=True + self, point: Union[pd.Timestamp, float, int], after=True ) -> int: """ Converts a point along the time axis index into an integer index ranging in (0, len(series)-1). @@ -2078,48 +2078,12 @@ def get_index_at_point( If an ``int`` and series is datetime-indexed, the value of `point` is returned. If an ``int`` and series is integer-indexed, the index position of `point` in the RangeIndex is returned (accounting for steps). - - If a ``dict`` {'point':int}, the integer index is retrieved starting from the end of the time - index. after If the provided pandas Timestamp is not in the time series index, whether to return the index of the next timestamp or the index of the previous one. """ point_index = -1 - # when passed as dict, the point is absolute, taken from the end of the time index - if isinstance(point, dict): - if set(point.keys()) != {"point"}: - raise_log( - ValueError( - "`point`, when passed as a dict, should only contain the 'point' key." - ), - logger, - ) - - point = point["point"] - if not isinstance(point, int): - raise_log( - ValueError( - f"start['point'] should be an integer, received {type(point)}." - ), - logger, - ) - - if np.abs(point) > len(self): - raise_log( - ValueError( - f"start['point'] is greater than the length of the series ({len(self)})." - ), - logger, - ) - - # convert point to a value taken from the time index - if point > 0: - point = self.time_index[len(self) - point] - else: - point = self.time_index[point + len(self)] - if isinstance(point, float): raise_if_not( 0.0 <= point <= 1.0, @@ -2139,7 +2103,7 @@ def get_index_at_point( ) raise_if_not( 0 <= point_index < len(self), - "point (int) should be a valid index in series", + f"The index corresponding to the provided point ({point}) should be a valid index in series", logger, ) elif isinstance(point, pd.Timestamp): @@ -2167,7 +2131,7 @@ def get_index_at_point( return point_index def get_timestamp_at_point( - self, point: Union[pd.Timestamp, float, int, Dict] + self, point: Union[pd.Timestamp, float, int] ) -> Union[pd.Timestamp, int]: """ Converts a point into a pandas.Timestamp (if Datetime-indexed) or into an integer (if Int64-indexed). @@ -2179,9 +2143,7 @@ def get_timestamp_at_point( In case of a `float`, the parameter will be treated as the proportion of the time series that should lie before the point. In case of `int`, the parameter will be treated as an integer index to the time index of - `series`. Will raise a ValueError if not a valid index in `series` - If case of `dict` {'point':int}, the point is retrieved starting from the end of the time - index. Will raise a ValueError if point is greater than the `series` length. + `series`. Will raise a ValueError if not a valid index in `series`. In case of a `pandas.Timestamp`, point will be returned as is provided that the timestamp is present in the series time index, otherwise will raise a ValueError. """ diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts.py b/darts/utils/historical_forecasts/optimized_historical_forecasts.py index 8b3a777471..c775bbd0b7 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Sequence, Union +from typing import List, Literal, Optional, Sequence, Union import numpy as np import pandas as pd @@ -20,6 +20,7 @@ def _optimized_historical_forecasts_regression_last_points_only( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, + start_format: Literal["point", "index"] = "point", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, @@ -63,6 +64,7 @@ def _optimized_historical_forecasts_regression_last_points_only( past_covariates=past_covariates_, future_covariates=future_covariates_, start=start, + start_format=start_format, forecast_horizon=forecast_horizon, overlap_end=overlap_end, freq=freq, @@ -156,6 +158,7 @@ def _optimized_historical_forecasts_regression_all_points( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, + start_format: Literal["point", "index"] = "point", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, @@ -199,6 +202,7 @@ def _optimized_historical_forecasts_regression_all_points( past_covariates=past_covariates_, future_covariates=future_covariates_, start=start, + start_format=start_format, forecast_horizon=forecast_horizon, overlap_end=overlap_end, freq=freq, diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py index 6be01dc0a8..46c2f72709 100644 --- a/darts/utils/historical_forecasts/utils.py +++ b/darts/utils/historical_forecasts/utils.py @@ -1,5 +1,5 @@ from types import SimpleNamespace -from typing import Any, Callable, Optional, Tuple, Union +from typing import Any, Callable, Literal, Optional, Tuple, Union import numpy as np import pandas as pd @@ -62,13 +62,17 @@ def _historical_forecasts_general_checks(model, series, kwargs): ), logger, ) - if isinstance(n.start, float): + + if n.start_format == "index": raise_if_not( - 0.0 <= n.start <= 1.0, "`start` should be between 0.0 and 1.0.", logger + isinstance(n.start, (int, np.int64)), + f"Since `start_format='index'`, `start` should be an integer, received {type(n.start)}", + logger, ) - elif isinstance(n.start, (int, np.int64)): + + if isinstance(n.start, float): raise_if_not( - n.start >= 0, "if `start` is an integer, must be `>= 0`.", logger + 0.0 <= n.start <= 1.0, "`start` should be between 0.0 and 1.0.", logger ) # verbose error messages @@ -96,10 +100,13 @@ def _historical_forecasts_general_checks(model, series, kwargs): logger, ) elif isinstance(n.start, (int, np.int64)): - if ( - series_.has_datetime_index - or (series_.has_range_index and series_.freq == 1) - ) and n.start >= len(series_): + if (n.start_format == "index" and np.abs(n.start) >= len(series_)) or ( + ( + series_.has_datetime_index + or (series_.has_range_index and series_.freq == 1) + ) + and n.start >= len(series_) + ): raise_log( ValueError( f"`start` index `{n.start}` is out of bounds for series of length {len(series_)} " @@ -117,33 +124,12 @@ def _historical_forecasts_general_checks(model, series, kwargs): ), logger, ) - elif isinstance(n.start, dict): - if set(n.start.keys()) != {"point"}: - raise_log( - ValueError( - "`start`, when passed as a dict, should only contain the 'point' key." - ), - logger, - ) - if not isinstance(n.start["point"], int): - raise_log( - ValueError( - f"start['point'] should be an integer, received {type(n.start['point'])}." - ), - logger, - ) + if n.start_format == "point": + start = series_.get_timestamp_at_point(n.start) + else: + start = series_.time_index[n.start] - if np.abs(n.start["point"]) > len(series_): - raise_log( - ValueError( - f"`start` index `{n.start['point']}` is greater than the length of the series " - f"at index: {idx}." - ), - logger, - ) - - start = series_.get_timestamp_at_point(n.start) if n.retrain is not False and start == series_.start_time(): raise_log( ValueError( @@ -393,12 +379,31 @@ def _adjust_historical_forecasts_time_index( forecast_horizon: int, overlap_end: bool, start: Optional[Union[pd.Timestamp, float, int]], + start_format: Literal["point", "index"], show_warnings: bool, ) -> TimeIndex: """ Shrink the beginning and end of the historical forecasts time index based on the values of `start`, `forecast_horizon` and `overlap_end`. """ + + if start_format == "index": + if not isinstance(start, int): + raise_log( + ValueError( + f"Since `start_format='index'`, `start` should be an integer, received {type(start)}" + ), + logger, + ) + + if start >= len(series): + raise_log( + ValueError( + f"`start` index `{start}` is out of bounds for series of length {len(series)}" + ), + logger, + ) + # shift the end of the forecastable index based on `overlap_end`` and `forecast_horizon`` last_valid_pred_time = model._get_last_prediction_time( series, @@ -413,7 +418,10 @@ def _adjust_historical_forecasts_time_index( # when applicable, shift the start of the forecastable index based on `start` if start is not None: - start_time_ = series.get_timestamp_at_point(start) + if start_format == "point": + start_time_ = series.get_timestamp_at_point(start) + else: + start_time_ = series.time_index[start] # ignore user-defined `start` if ( not historical_forecasts_time_index[0] @@ -568,6 +576,7 @@ def _get_historical_forecast_boundaries( past_covariates: Optional[TimeSeries], future_covariates: Optional[TimeSeries], start: Optional[Union[pd.Timestamp, float, int]], + start_format: Literal["point", "index"], forecast_horizon: int, overlap_end: bool, freq: pd.DateOffset, @@ -595,6 +604,7 @@ def _get_historical_forecast_boundaries( forecast_horizon=forecast_horizon, overlap_end=overlap_end, start=start, + start_format=start_format, show_warnings=show_warnings, ) From c4fcd58e7d449071d60e62f1e8867bce94b5a2f8 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Fri, 11 Aug 2023 11:48:42 +0200 Subject: [PATCH 05/21] test: udpated tests accordingly --- .../forecasting/test_historical_forecasts.py | 84 ++++++++++++++++--- darts/tests/test_timeseries.py | 6 -- 2 files changed, 72 insertions(+), 18 deletions(-) diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index 1a57386bfd..fcc71a367e 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -374,17 +374,25 @@ def test_historical_forecasts_local_models(self): "LocalForecastingModel does not support historical forecasting with `retrain` set to `False`" ) - def test_historical_forecasts_negative_start(self): + def test_historical_forecasts_index_start(self): series = tg.sine_timeseries(length=10) model = LinearRegressionModel(lags=2) model.fit(series[:8]) + # negative index forecasts = model.historical_forecasts( - series=series, start={"point": -2}, retrain=False + series=series, start=-2, start_format="index", retrain=False ) self.assertEqual(len(forecasts), 2) - self.assertEqual(series.time_index[-2], forecasts.time_index[0]) + self.assertTrue((series.time_index[-2:] == forecasts.time_index).all()) + + # positive index + forecasts = model.historical_forecasts( + series=series, start=5, start_format="index", retrain=False + ) + self.assertEqual(len(forecasts), 5) + self.assertTrue((series.time_index[5:] == forecasts.time_index).all()) def test_historical_forecasts(self): train_length = 10 @@ -563,7 +571,7 @@ def test_sanity_check_invalid_start(self): rangeidx_step1 = tg.linear_timeseries(start=0, length=10, freq=1) rangeidx_step2 = tg.linear_timeseries(start=0, length=10, freq=2) - # index too large + # point (int) too large with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts(timeidx_, start=11) assert str(msg.value).startswith("`start` index `11` is out of bounds") @@ -574,26 +582,32 @@ def test_sanity_check_invalid_start(self): LinearRegressionModel(lags=1).historical_forecasts(rangeidx_step2, start=11) assert str(msg.value).startswith("The provided point is not a valid index") - # value too low + # point (int) too low with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=timeidx_.start_time() - timeidx_.freq + rangeidx_step1, start=rangeidx_step1.start_time() - rangeidx_step1.freq ) assert str(msg.value).startswith( - "`start` time `1999-12-31 00:00:00` is before the first timestamp `2000-01-01 00:00:00`" + "The index corresponding to the provided point (" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step1, start=rangeidx_step1.start_time() - rangeidx_step1.freq + rangeidx_step2, start=rangeidx_step2.start_time() - rangeidx_step2.freq ) - assert str(msg.value).startswith("if `start` is an integer, must be `>= 0`") + assert str(msg.value).startswith( + "The index corresponding to the provided point (" + ) + + # point (timestamp) too low with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step2, start=rangeidx_step2.start_time() - rangeidx_step2.freq + timeidx_, start=timeidx_.start_time() - timeidx_.freq ) - assert str(msg.value).startswith("if `start` is an integer, must be `>= 0`") + assert str(msg.value).startswith( + "`start` time `1999-12-31 00:00:00` is before the first timestamp `2000-01-01 00:00:00`" + ) - # value too high + # point (timestamp) too high with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( timeidx_, start=timeidx_.end_time() + timeidx_.freq @@ -614,6 +628,52 @@ def test_sanity_check_invalid_start(self): "`start` index `20` is larger than the last index `18`" ) + # index too high when start_format = 'index' + with pytest.raises(ValueError) as msg: + LinearRegressionModel(lags=1).historical_forecasts( + timeidx_, start=11, start_format="index" + ) + assert str(msg.value).startswith( + "`start` index `11` is out of bounds for series of length 10" + ) + with pytest.raises(ValueError) as msg: + LinearRegressionModel(lags=1).historical_forecasts( + rangeidx_step1, start=11, start_format="index" + ) + assert str(msg.value).startswith( + "`start` index `11` is out of bounds for series of length 10" + ) + with pytest.raises(ValueError) as msg: + LinearRegressionModel(lags=1).historical_forecasts( + rangeidx_step2, start=11, start_format="index" + ) + assert str(msg.value).startswith( + "`start` index `11` is out of bounds for series of length 10" + ) + + # index too high (negative) when start_format = 'index' + with pytest.raises(ValueError) as msg: + LinearRegressionModel(lags=1).historical_forecasts( + timeidx_, start=-11, start_format="index" + ) + assert str(msg.value).startswith( + "`start` index `-11` is out of bounds for series of length 10" + ) + with pytest.raises(ValueError) as msg: + LinearRegressionModel(lags=1).historical_forecasts( + rangeidx_step1, start=-11, start_format="index" + ) + assert str(msg.value).startswith( + "`start` index `-11` is out of bounds for series of length 10" + ) + with pytest.raises(ValueError) as msg: + LinearRegressionModel(lags=1).historical_forecasts( + rangeidx_step2, start=-11, start_format="index" + ) + assert str(msg.value).startswith( + "`start` index `-11` is out of bounds for series of length 10" + ) + def test_regression_auto_start_multiple_no_cov(self): train_length = 15 forecast_horizon = 10 diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py index c4fd6d3646..f2e0eeccef 100644 --- a/darts/tests/test_timeseries.py +++ b/darts/tests/test_timeseries.py @@ -117,8 +117,6 @@ def test_integer_range_indexing(self): # getting index for idx should return i s.t., series[i].time == idx self.assertEqual(series.get_index_at_point(101), 91) - # getting index for negative idx return idx + len(ts) - self.assertEqual(series.get_index_at_point({"point": -3}), 97) # slicing outside of the index range should return an empty ts self.assertEqual(len(series[120:125]), 0) @@ -136,8 +134,6 @@ def test_integer_range_indexing(self): # getting index for idx should return i s.t., series[i].time == idx self.assertEqual(series.get_index_at_point(100), 50) - # getting index for negative idx return idx + len(ts) - self.assertEqual(series.get_index_at_point({"point": -1}), 99) # getting index outside of the index range should raise an exception with self.assertRaises(IndexError): @@ -166,8 +162,6 @@ def test_integer_range_indexing(self): # getting index for idx should return i s.t., series[i].time == idx self.assertEqual(series.get_index_at_point(16), 3) - # getting index for negative idx return idx + len(ts) - self.assertEqual(series.get_index_at_point({"point": -2}), 8) def test_integer_indexing(self): n = 10 From eef40894c4b796d6b7fcecf2b0d4e3c2e46010eb Mon Sep 17 00:00:00 2001 From: madtoinou Date: Fri, 11 Aug 2023 11:51:53 +0200 Subject: [PATCH 06/21] doc: updated changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03517933b4..77dd076044 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ but cannot always guarantee backwards compatibility. Changes that may **break co ### For users of the library: +**Improvement** +- `TimeSeries` with a `PeriodIndex` and a negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). +- Added a new argument `start_format` to `historical_forecasts`, `start` can now be provided as an absolute index (positive or negative). [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). + **Fixed** - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou). - Fixed a bug in `RegressionEnsembleModel.extreme_lags` when the forecasting models have only covariates lags. [#1942](https://github.com/unit8co/darts/pull/1942) by [Antoine Madrona](https://github.com/madtoinou). From a7271523ea48eafdd622f60c2a5e40f9fc28111f Mon Sep 17 00:00:00 2001 From: madtoinou Date: Fri, 11 Aug 2023 12:01:59 +0200 Subject: [PATCH 07/21] test: added test for historical forecast on ts using a rangeindex starting with a negative value --- CHANGELOG.md | 4 ++-- .../forecasting/test_historical_forecasts.py | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77dd076044..afe3436e67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,8 @@ but cannot always guarantee backwards compatibility. Changes that may **break co ### For users of the library: **Improvement** -- `TimeSeries` with a `PeriodIndex` and a negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). -- Added a new argument `start_format` to `historical_forecasts`, `start` can now be provided as an absolute index (positive or negative). [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). +- `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). +- Added a new argument `start_format` to `historical_forecasts`, `start` can now be provided as an absolute index (positive or negative) instead of a point of the time index. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). **Fixed** - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou). diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index fcc71a367e..b71e51d712 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -394,6 +394,28 @@ def test_historical_forecasts_index_start(self): self.assertEqual(len(forecasts), 5) self.assertTrue((series.time_index[5:] == forecasts.time_index).all()) + def test_historical_forecasts_negative_rangeindex(self): + series = TimeSeries.from_times_and_values( + times=pd.RangeIndex(start=-5, stop=5, step=1), values=np.arange(10) + ) + + model = LinearRegressionModel(lags=2) + model.fit(series[:8]) + + # start as point + forecasts = model.historical_forecasts( + series=series, start=-2, start_format="point", retrain=False + ) + self.assertEqual(len(forecasts), 7) + self.assertTrue((series.time_index[-7:] == forecasts.time_index).all()) + + # start as index + forecasts = model.historical_forecasts( + series=series, start=-2, start_format="index", retrain=False + ) + self.assertEqual(len(forecasts), 2) + self.assertTrue((series.time_index[-2:] == forecasts.time_index).all()) + def test_historical_forecasts(self): train_length = 10 forecast_horizon = 8 From c1cccc3b7a76e8e3c2e980952d3ba4f20b1e6c59 Mon Sep 17 00:00:00 2001 From: madtoinou <32447896+madtoinou@users.noreply.github.com> Date: Fri, 11 Aug 2023 16:32:17 +0200 Subject: [PATCH 08/21] Apply suggestions from code review Co-authored-by: Dennis Bader --- darts/timeseries.py | 2 +- darts/utils/historical_forecasts/utils.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/darts/timeseries.py b/darts/timeseries.py index ab3e3968c5..2c54268d2c 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -2064,7 +2064,7 @@ def get_index_at_point( Parameters ---------- point - This parameter supports 4 different data types: ``pd.Timestamp``, ``float``, ``int`` and ``dict``. + This parameter supports 3 different data types: ``pd.Timestamp``, ``float`` and ``int``. ``pd.Timestamp`` work only on series that are indexed with a ``pd.DatetimeIndex``. In such cases, the returned point will be the index of this timestamp if it is present in the series time index. diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py index 46c2f72709..ed83673394 100644 --- a/darts/utils/historical_forecasts/utils.py +++ b/darts/utils/historical_forecasts/utils.py @@ -55,10 +55,10 @@ def _historical_forecasts_general_checks(model, series, kwargs): if n.start is not None: # check start parameter in general (non series dependent) - if not isinstance(n.start, (float, int, np.int64, pd.Timestamp, dict)): + if not isinstance(n.start, (float, int, np.int64, pd.Timestamp)): raise_log( TypeError( - "`start` needs to be either `float`, `int`, `pd.Timestamp`, `dict`, or `None`" + "`start` must be either `float`, `int`, `pd.Timestamp` or `None`" ), logger, ) @@ -66,13 +66,13 @@ def _historical_forecasts_general_checks(model, series, kwargs): if n.start_format == "index": raise_if_not( isinstance(n.start, (int, np.int64)), - f"Since `start_format='index'`, `start` should be an integer, received {type(n.start)}", + f"Since `start_format='index'`, `start` must be an integer, received {type(n.start)}", logger, ) if isinstance(n.start, float): raise_if_not( - 0.0 <= n.start <= 1.0, "`start` should be between 0.0 and 1.0.", logger + 0.0 <= n.start <= 1.0, "if `start` is a float, must be between 0.0 and 1.0.", logger ) # verbose error messages From 7beb2a6c61822ac449cfd820d924993a994aa2cc Mon Sep 17 00:00:00 2001 From: madtoinou Date: Fri, 11 Aug 2023 17:19:06 +0200 Subject: [PATCH 09/21] fix: changed the literal to 'positional_index' and 'value_index' --- CHANGELOG.md | 2 +- darts/models/forecasting/forecasting_model.py | 20 ++++++-- darts/models/forecasting/regression_model.py | 2 +- .../forecasting/test_historical_forecasts.py | 37 +++++++------- .../optimized_historical_forecasts.py | 4 +- darts/utils/historical_forecasts/utils.py | 49 ++++++++----------- 6 files changed, 58 insertions(+), 56 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index afe3436e67..861de87aaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Improvement** - `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). -- Added a new argument `start_format` to `historical_forecasts`, `start` can now be provided as an absolute index (positive or negative) instead of a point of the time index. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). +- 🔴 Added a new argument `start_format` to `historical_forecasts`, `start` can now be provided as a 'positional index' (positive or negative) or a point of the time index ('value index', default behavior). [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). **Fixed** - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou). diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 20e0775028..830761f0a9 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -571,7 +571,7 @@ def historical_forecasts( num_samples: int = 1, train_length: Optional[int] = None, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["point", "index"] = "point", + start_format: Literal["positional_index", "value_index"] = "value_index", forecast_horizon: int = 1, stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, @@ -641,8 +641,9 @@ def historical_forecasts( Note: If `start` is outside the possible historical forecasting times, will ignore the parameter (default behavior with ``None``) and start at the first trainable/predictable point. start_format - If set to 'index', `start` must be an integer and corresponds to the absolute position of the first point - in time at which the prediction is generated. Default: ``'point'``. + If set to 'positional_index', `start` must be an ``int`` corresponding to the position of the first + predicted point. If set to 'value_index', `start` must be a ``float`` or an element from the time index. + Default: ``'value_index'``. forecast_horizon The forecast horizon for the predictions. stride @@ -1047,6 +1048,7 @@ def backtest( num_samples: int = 1, train_length: Optional[int] = None, start: Optional[Union[pd.Timestamp, float, int]] = None, + # start_format: Literal["positional_index", "value_index"] = "value_index", forecast_horizon: int = 1, stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, @@ -1121,6 +1123,10 @@ def backtest( Note: Raises a ValueError if `start` yields a time outside the time index of `series`. Note: If `start` is outside the possible historical forecasting times, will ignore the parameter (default behavior with ``None``) and start at the first trainable/predictable point. + start_format + If set to 'positional_index', `start` must be an ``int`` corresponding to the position of the first + predicted point. If set to 'value_index', `start` must be a ``float`` or an element from the time index. + Default: ``'value_index'``. forecast_horizon The forecast horizon for the point predictions. stride @@ -1177,6 +1183,7 @@ def backtest( num_samples=num_samples, train_length=train_length, start=start, + # start_format=start_format, forecast_horizon=forecast_horizon, stride=stride, retrain=retrain, @@ -1227,6 +1234,7 @@ def gridsearch( forecast_horizon: Optional[int] = None, stride: int = 1, start: Union[pd.Timestamp, float, int] = 0.5, + # start_format: Literal["positional_index", "value_index"] = "value_index", last_points_only: bool = False, show_warnings: bool = True, val_series: Optional[TimeSeries] = None, @@ -1298,6 +1306,9 @@ def gridsearch( of `series` from which predictions will be made to evaluate the model. For a detailed description of how the different data types are interpreted, please see the documentation for `ForecastingModel.backtest`. Only used in expanding window mode. + start_format + The format of the start parameter, either 'positional_index' or 'value_index'. + For a detailed description this argument, please see the documentation for `ForecastingModel.backtest`. last_points_only Whether to use the whole forecasts or only the last point of each forecast to compute the error. Only used in expanding window mode. @@ -1403,6 +1414,7 @@ def _evaluate_combination(param_combination) -> float: future_covariates=future_covariates, num_samples=1, start=start, + # start_format=start_format, forecast_horizon=forecast_horizon, stride=stride, metric=metric, @@ -1910,7 +1922,7 @@ def _optimized_historical_forecasts( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["point", "index"] = "point", + start_format: Literal["positional_index", "value_index"] = "value_index", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 908d44f321..7c20cc67fa 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -897,7 +897,7 @@ def _optimized_historical_forecasts( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["point", "index"] = "point", + start_format: Literal["positional_index", "value_index"] = "value_index", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index b71e51d712..bb388d73fc 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -374,25 +374,24 @@ def test_historical_forecasts_local_models(self): "LocalForecastingModel does not support historical forecasting with `retrain` set to `False`" ) - def test_historical_forecasts_index_start(self): + def test_historical_forecasts_positional_index_start(self): series = tg.sine_timeseries(length=10) model = LinearRegressionModel(lags=2) model.fit(series[:8]) # negative index - forecasts = model.historical_forecasts( - series=series, start=-2, start_format="index", retrain=False + forecasts_neg = model.historical_forecasts( + series=series, start=-2, start_format="positional_index", retrain=False ) - self.assertEqual(len(forecasts), 2) - self.assertTrue((series.time_index[-2:] == forecasts.time_index).all()) + self.assertEqual(len(forecasts_neg), 2) + self.assertTrue((series.time_index[-2:] == forecasts_neg.time_index).all()) # positive index - forecasts = model.historical_forecasts( - series=series, start=5, start_format="index", retrain=False + forecasts_pos = model.historical_forecasts( + series=series, start=8, start_format="positional_index", retrain=False ) - self.assertEqual(len(forecasts), 5) - self.assertTrue((series.time_index[5:] == forecasts.time_index).all()) + self.assertEqual(forecasts_pos, forecasts_neg) def test_historical_forecasts_negative_rangeindex(self): series = TimeSeries.from_times_and_values( @@ -404,14 +403,14 @@ def test_historical_forecasts_negative_rangeindex(self): # start as point forecasts = model.historical_forecasts( - series=series, start=-2, start_format="point", retrain=False + series=series, start=-2, start_format="value_index", retrain=False ) self.assertEqual(len(forecasts), 7) self.assertTrue((series.time_index[-7:] == forecasts.time_index).all()) # start as index forecasts = model.historical_forecasts( - series=series, start=-2, start_format="index", retrain=False + series=series, start=-2, start_format="positional_index", retrain=False ) self.assertEqual(len(forecasts), 2) self.assertTrue((series.time_index[-2:] == forecasts.time_index).all()) @@ -650,47 +649,47 @@ def test_sanity_check_invalid_start(self): "`start` index `20` is larger than the last index `18`" ) - # index too high when start_format = 'index' + # index too high when start_format = 'positional_index' with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=11, start_format="index" + timeidx_, start=11, start_format="positional_index" ) assert str(msg.value).startswith( "`start` index `11` is out of bounds for series of length 10" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step1, start=11, start_format="index" + rangeidx_step1, start=11, start_format="positional_index" ) assert str(msg.value).startswith( "`start` index `11` is out of bounds for series of length 10" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step2, start=11, start_format="index" + rangeidx_step2, start=11, start_format="positional_index" ) assert str(msg.value).startswith( "`start` index `11` is out of bounds for series of length 10" ) - # index too high (negative) when start_format = 'index' + # index too high (negative) when start_format = 'positional_index' with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=-11, start_format="index" + timeidx_, start=-11, start_format="positional_index" ) assert str(msg.value).startswith( "`start` index `-11` is out of bounds for series of length 10" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step1, start=-11, start_format="index" + rangeidx_step1, start=-11, start_format="positional_index" ) assert str(msg.value).startswith( "`start` index `-11` is out of bounds for series of length 10" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step2, start=-11, start_format="index" + rangeidx_step2, start=-11, start_format="positional_index" ) assert str(msg.value).startswith( "`start` index `-11` is out of bounds for series of length 10" diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts.py b/darts/utils/historical_forecasts/optimized_historical_forecasts.py index c775bbd0b7..ffaf71099d 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts.py @@ -20,7 +20,7 @@ def _optimized_historical_forecasts_regression_last_points_only( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["point", "index"] = "point", + start_format: Literal["positional_index", "value_index"] = "value_index", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, @@ -158,7 +158,7 @@ def _optimized_historical_forecasts_regression_all_points( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["point", "index"] = "point", + start_format: Literal["positional_index", "value_index"] = "value_index", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py index ed83673394..5a680d04b4 100644 --- a/darts/utils/historical_forecasts/utils.py +++ b/darts/utils/historical_forecasts/utils.py @@ -63,7 +63,7 @@ def _historical_forecasts_general_checks(model, series, kwargs): logger, ) - if n.start_format == "index": + if n.start_format == "positional_index": raise_if_not( isinstance(n.start, (int, np.int64)), f"Since `start_format='index'`, `start` must be an integer, received {type(n.start)}", @@ -72,7 +72,9 @@ def _historical_forecasts_general_checks(model, series, kwargs): if isinstance(n.start, float): raise_if_not( - 0.0 <= n.start <= 1.0, "if `start` is a float, must be between 0.0 and 1.0.", logger + 0.0 <= n.start <= 1.0, + "if `start` is a float, must be between 0.0 and 1.0.", + logger, ) # verbose error messages @@ -100,13 +102,20 @@ def _historical_forecasts_general_checks(model, series, kwargs): logger, ) elif isinstance(n.start, (int, np.int64)): - if (n.start_format == "index" and np.abs(n.start) >= len(series_)) or ( - ( + out_of_bound_error = False + if n.start_format == "positional_index": + if (n.start > 0 and n.start >= len(series_)) or ( + n.start < 0 and np.abs(n.start) > len(series_) + ): + out_of_bound_error = True + else: + if ( series_.has_datetime_index or (series_.has_range_index and series_.freq == 1) - ) - and n.start >= len(series_) - ): + ) and n.start >= len(series_): + out_of_bound_error = True + + if out_of_bound_error: raise_log( ValueError( f"`start` index `{n.start}` is out of bounds for series of length {len(series_)} " @@ -125,7 +134,7 @@ def _historical_forecasts_general_checks(model, series, kwargs): logger, ) - if n.start_format == "point": + if n.start_format == "value_index": start = series_.get_timestamp_at_point(n.start) else: start = series_.time_index[n.start] @@ -379,31 +388,13 @@ def _adjust_historical_forecasts_time_index( forecast_horizon: int, overlap_end: bool, start: Optional[Union[pd.Timestamp, float, int]], - start_format: Literal["point", "index"], + start_format: Literal["positional_index", "value_index"], show_warnings: bool, ) -> TimeIndex: """ Shrink the beginning and end of the historical forecasts time index based on the values of `start`, `forecast_horizon` and `overlap_end`. """ - - if start_format == "index": - if not isinstance(start, int): - raise_log( - ValueError( - f"Since `start_format='index'`, `start` should be an integer, received {type(start)}" - ), - logger, - ) - - if start >= len(series): - raise_log( - ValueError( - f"`start` index `{start}` is out of bounds for series of length {len(series)}" - ), - logger, - ) - # shift the end of the forecastable index based on `overlap_end`` and `forecast_horizon`` last_valid_pred_time = model._get_last_prediction_time( series, @@ -418,7 +409,7 @@ def _adjust_historical_forecasts_time_index( # when applicable, shift the start of the forecastable index based on `start` if start is not None: - if start_format == "point": + if start_format == "value_index": start_time_ = series.get_timestamp_at_point(start) else: start_time_ = series.time_index[start] @@ -576,7 +567,7 @@ def _get_historical_forecast_boundaries( past_covariates: Optional[TimeSeries], future_covariates: Optional[TimeSeries], start: Optional[Union[pd.Timestamp, float, int]], - start_format: Literal["point", "index"], + start_format: Literal["positional_index", "value_index"], forecast_horizon: int, overlap_end: bool, freq: pd.DateOffset, From 4e40275abfa8fd436cd6c97c8810b79654eddf77 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Fri, 11 Aug 2023 18:00:07 +0200 Subject: [PATCH 10/21] feat: making the error messages more informative, adapted the tests accordingly --- .../forecasting/test_historical_forecasts.py | 79 +++++++------------ darts/utils/historical_forecasts/utils.py | 27 ++++--- 2 files changed, 45 insertions(+), 61 deletions(-) diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index bb388d73fc..8cf9193fb2 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -592,43 +592,26 @@ def test_sanity_check_invalid_start(self): rangeidx_step1 = tg.linear_timeseries(start=0, length=10, freq=1) rangeidx_step2 = tg.linear_timeseries(start=0, length=10, freq=2) - # point (int) too large + # label_index (int), too large with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts(timeidx_, start=11) assert str(msg.value).startswith("`start` index `11` is out of bounds") - with pytest.raises(ValueError) as msg: - LinearRegressionModel(lags=1).historical_forecasts(rangeidx_step1, start=11) - assert str(msg.value).startswith("`start` index `11` is out of bounds") - with pytest.raises(ValueError) as msg: - LinearRegressionModel(lags=1).historical_forecasts(rangeidx_step2, start=11) - assert str(msg.value).startswith("The provided point is not a valid index") - - # point (int) too low - with pytest.raises(ValueError) as msg: - LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step1, start=rangeidx_step1.start_time() - rangeidx_step1.freq - ) - assert str(msg.value).startswith( - "The index corresponding to the provided point (" - ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step2, start=rangeidx_step2.start_time() - rangeidx_step2.freq + rangeidx_step1, start=rangeidx_step1.end_time() + rangeidx_step1.freq ) assert str(msg.value).startswith( - "The index corresponding to the provided point (" + "`start` index `10` is larger than the last index" ) - - # point (timestamp) too low with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=timeidx_.start_time() - timeidx_.freq + rangeidx_step2, start=rangeidx_step2.end_time() + rangeidx_step2.freq ) assert str(msg.value).startswith( - "`start` time `1999-12-31 00:00:00` is before the first timestamp `2000-01-01 00:00:00`" + "`start` index `20` is larger than the last index" ) - # point (timestamp) too high + # label_index (timestamp) too high with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( timeidx_, start=timeidx_.end_time() + timeidx_.freq @@ -636,60 +619,58 @@ def test_sanity_check_invalid_start(self): assert str(msg.value).startswith( "`start` time `2000-01-11 00:00:00` is after the last timestamp `2000-01-10 00:00:00`" ) + + # label_index, invalid with pytest.raises(ValueError) as msg: - LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step1, start=rangeidx_step1.end_time() + rangeidx_step1.freq - ) - assert str(msg.value).startswith("`start` index `10` is out of bounds") - with pytest.raises(ValueError) as msg: - LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step2, start=rangeidx_step2.end_time() + rangeidx_step2.freq - ) - assert str(msg.value).startswith( - "`start` index `20` is larger than the last index `18`" - ) + LinearRegressionModel(lags=1).historical_forecasts(rangeidx_step2, start=11) + assert str(msg.value).startswith("The provided point is not a valid index") - # index too high when start_format = 'positional_index' + # label_index, too low with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=11, start_format="positional_index" + timeidx_, start=timeidx_.start_time() - timeidx_.freq ) assert str(msg.value).startswith( - "`start` index `11` is out of bounds for series of length 10" + "`start` time `1999-12-31 00:00:00` is before the first timestamp `2000-01-01 00:00:00`" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step1, start=11, start_format="positional_index" + rangeidx_step1, start=rangeidx_step1.start_time() - rangeidx_step1.freq ) assert str(msg.value).startswith( - "`start` index `11` is out of bounds for series of length 10" + "`start` index `-1` is smaller than the first index `0`" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step2, start=11, start_format="positional_index" + rangeidx_step2, start=rangeidx_step2.start_time() - rangeidx_step2.freq ) assert str(msg.value).startswith( - "`start` index `11` is out of bounds for series of length 10" + "`start` index `-2` is smaller than the first index `0`" + ) + + # positional_index, predicting only the last position + LinearRegressionModel(lags=1).historical_forecasts( + timeidx_, start=9, start_format="positional_index" ) - # index too high (negative) when start_format = 'positional_index' + # positional_index, predicting from the first position with retrain=True with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=-11, start_format="positional_index" + timeidx_, start=-10, start_format="positional_index" ) - assert str(msg.value).startswith( - "`start` index `-11` is out of bounds for series of length 10" - ) + assert str(msg.value).endswith(", resulting in an empty training set.") + + # positional_index, beyond boundaries with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step1, start=-11, start_format="positional_index" + timeidx_, start=10, start_format="positional_index" ) assert str(msg.value).startswith( - "`start` index `-11` is out of bounds for series of length 10" + "`start` index `10` is out of bounds for series of length 10" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - rangeidx_step2, start=-11, start_format="positional_index" + timeidx_, start=-11, start_format="positional_index" ) assert str(msg.value).startswith( "`start` index `-11` is out of bounds for series of length 10" diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py index 5a680d04b4..3c3785cade 100644 --- a/darts/utils/historical_forecasts/utils.py +++ b/darts/utils/historical_forecasts/utils.py @@ -108,24 +108,18 @@ def _historical_forecasts_general_checks(model, series, kwargs): n.start < 0 and np.abs(n.start) > len(series_) ): out_of_bound_error = True - else: - if ( - series_.has_datetime_index - or (series_.has_range_index and series_.freq == 1) - ) and n.start >= len(series_): + elif series_.has_datetime_index: + if n.start >= len(series_): out_of_bound_error = True - - if out_of_bound_error: + elif n.start < series_.time_index[0]: raise_log( ValueError( - f"`start` index `{n.start}` is out of bounds for series of length {len(series_)} " - f"at index: {idx}." + f"`start` index `{n.start}` is smaller than the first index `{series_.time_index[0]}` " + f"for series at index: {idx}." ), logger, ) - elif ( - series_.has_range_index and series_.freq > 1 - ) and n.start > series_.time_index[-1]: + elif n.start > series_.time_index[-1]: raise_log( ValueError( f"`start` index `{n.start}` is larger than the last index `{series_.time_index[-1]}` " @@ -134,6 +128,15 @@ def _historical_forecasts_general_checks(model, series, kwargs): logger, ) + if out_of_bound_error: + raise_log( + ValueError( + f"`start` index `{n.start}` is out of bounds for series of length {len(series_)} " + f"at index: {idx}." + ), + logger, + ) + if n.start_format == "value_index": start = series_.get_timestamp_at_point(n.start) else: From 90b2e62e21ffe98df95dd81024a855fcd71fbd43 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Fri, 11 Aug 2023 18:55:45 +0200 Subject: [PATCH 11/21] feat: extending the new argument to backtest and gridsearch --- CHANGELOG.md | 2 +- darts/models/forecasting/forecasting_model.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 861de87aaa..1e23c920ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Improvement** - `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). -- 🔴 Added a new argument `start_format` to `historical_forecasts`, `start` can now be provided as a 'positional index' (positive or negative) or a point of the time index ('value index', default behavior). [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). +- 🔴 Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` so that `start` can be provided as a 'positional index' (positive or negative) or a point of the time index ('value index', default behavior). [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). **Fixed** - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou). diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 830761f0a9..0ecbba8a84 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1048,7 +1048,7 @@ def backtest( num_samples: int = 1, train_length: Optional[int] = None, start: Optional[Union[pd.Timestamp, float, int]] = None, - # start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["positional_index", "value_index"] = "value_index", forecast_horizon: int = 1, stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, @@ -1183,7 +1183,7 @@ def backtest( num_samples=num_samples, train_length=train_length, start=start, - # start_format=start_format, + start_format=start_format, forecast_horizon=forecast_horizon, stride=stride, retrain=retrain, @@ -1234,7 +1234,7 @@ def gridsearch( forecast_horizon: Optional[int] = None, stride: int = 1, start: Union[pd.Timestamp, float, int] = 0.5, - # start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["positional_index", "value_index"] = "value_index", last_points_only: bool = False, show_warnings: bool = True, val_series: Optional[TimeSeries] = None, @@ -1414,7 +1414,7 @@ def _evaluate_combination(param_combination) -> float: future_covariates=future_covariates, num_samples=1, start=start, - # start_format=start_format, + start_format=start_format, forecast_horizon=forecast_horizon, stride=stride, metric=metric, From ce4b669351ecd74074ddfc2c89b790d680ee531f Mon Sep 17 00:00:00 2001 From: madtoinou Date: Mon, 14 Aug 2023 11:44:21 +0200 Subject: [PATCH 12/21] fix: import of Literal for python 3.8 --- darts/models/forecasting/forecasting_model.py | 18 ++++++------------ darts/models/forecasting/regression_model.py | 7 ++++++- .../optimized_historical_forecasts.py | 7 ++++++- darts/utils/historical_forecasts/utils.py | 7 ++++++- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 0ecbba8a84..43c34eb68d 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -22,18 +22,12 @@ from collections import OrderedDict from itertools import product from random import sample -from typing import ( - Any, - BinaryIO, - Callable, - Dict, - List, - Literal, - Optional, - Sequence, - Tuple, - Union, -) +from typing import Any, BinaryIO, Callable, Dict, List, Optional, Sequence, Tuple, Union + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal import numpy as np import pandas as pd diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 7c20cc67fa..1371bdd147 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -27,7 +27,12 @@ if their static covariates do not have the same size, the shorter ones are padded with 0 valued features. """ from collections import OrderedDict -from typing import Any, Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal import numpy as np import pandas as pd diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts.py b/darts/utils/historical_forecasts/optimized_historical_forecasts.py index ffaf71099d..661541a833 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts.py @@ -1,4 +1,9 @@ -from typing import List, Literal, Optional, Sequence, Union +from typing import List, Optional, Sequence, Union + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal import numpy as np import pandas as pd diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py index 3c3785cade..b06fc2db7f 100644 --- a/darts/utils/historical_forecasts/utils.py +++ b/darts/utils/historical_forecasts/utils.py @@ -1,5 +1,10 @@ from types import SimpleNamespace -from typing import Any, Callable, Literal, Optional, Tuple, Union +from typing import Any, Callable, Optional, Tuple, Union + +try: + from typing import Literal +except ImportError: + from typing_extensions import Literal import numpy as np import pandas as pd From 292af5494053edf9c4c14b59b7a3ae17a6d96431 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Mon, 14 Aug 2023 11:56:50 +0200 Subject: [PATCH 13/21] doc: updated changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e23c920ff..bb05974336 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Improvement** - `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). -- 🔴 Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` so that `start` can be provided as a 'positional index' (positive or negative) or a point of the time index ('value index', default behavior). [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). +- Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` so that integer `start` can be used a position or a value of the time index. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). **Fixed** - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou). From 86c3b84b9ea20c31bd38c6dd94e25b80471a2181 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Mon, 14 Aug 2023 12:22:31 +0200 Subject: [PATCH 14/21] fix: shortened the literal for start_format, updated tests accordingly --- darts/models/forecasting/forecasting_model.py | 28 +++++++++++-------- darts/models/forecasting/regression_model.py | 2 +- .../forecasting/test_historical_forecasts.py | 18 ++++++------ .../optimized_historical_forecasts.py | 4 +-- darts/utils/historical_forecasts/utils.py | 12 ++++---- 5 files changed, 34 insertions(+), 30 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 43c34eb68d..16bb505fce 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -565,7 +565,7 @@ def historical_forecasts( num_samples: int = 1, train_length: Optional[int] = None, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["position", "value"] = "value", forecast_horizon: int = 1, stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, @@ -635,9 +635,12 @@ def historical_forecasts( Note: If `start` is outside the possible historical forecasting times, will ignore the parameter (default behavior with ``None``) and start at the first trainable/predictable point. start_format - If set to 'positional_index', `start` must be an ``int`` corresponding to the position of the first - predicted point. If set to 'value_index', `start` must be a ``float`` or an element from the time index. - Default: ``'value_index'``. + Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a + `pd.RangeIndex`. + If set to 'position', `start` corresponds to the index position of the first predicted point and can range + from `(-len(series), len(series) - 1)`. + If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise + an error if the value is not in `series`' index. Default: ``'value'`` forecast_horizon The forecast horizon for the predictions. stride @@ -1042,7 +1045,7 @@ def backtest( num_samples: int = 1, train_length: Optional[int] = None, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["position", "value"] = "value", forecast_horizon: int = 1, stride: int = 1, retrain: Union[bool, int, Callable[..., bool]] = True, @@ -1118,9 +1121,9 @@ def backtest( Note: If `start` is outside the possible historical forecasting times, will ignore the parameter (default behavior with ``None``) and start at the first trainable/predictable point. start_format - If set to 'positional_index', `start` must be an ``int`` corresponding to the position of the first - predicted point. If set to 'value_index', `start` must be a ``float`` or an element from the time index. - Default: ``'value_index'``. + Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a + `pd.RangeIndex`. For a detailed description this argument, please see the documentation for + `ForecastingModel.historical_forecasts`. forecast_horizon The forecast horizon for the point predictions. stride @@ -1228,7 +1231,7 @@ def gridsearch( forecast_horizon: Optional[int] = None, stride: int = 1, start: Union[pd.Timestamp, float, int] = 0.5, - start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["position", "value"] = "value", last_points_only: bool = False, show_warnings: bool = True, val_series: Optional[TimeSeries] = None, @@ -1299,10 +1302,11 @@ def gridsearch( The ``int``, ``float`` or ``pandas.Timestamp`` that represents the starting point in the time index of `series` from which predictions will be made to evaluate the model. For a detailed description of how the different data types are interpreted, please see the documentation - for `ForecastingModel.backtest`. Only used in expanding window mode. + for `ForecastingModel.historical_forecasts`. Only used in expanding window mode. start_format - The format of the start parameter, either 'positional_index' or 'value_index'. - For a detailed description this argument, please see the documentation for `ForecastingModel.backtest`. + Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a + `pd.RangeIndex`. For a detailed description this argument, please see the documentation for + `ForecastingModel.historical_forecasts`. last_points_only Whether to use the whole forecasts or only the last point of each forecast to compute the error. Only used in expanding window mode. diff --git a/darts/models/forecasting/regression_model.py b/darts/models/forecasting/regression_model.py index 1371bdd147..f51c26e902 100644 --- a/darts/models/forecasting/regression_model.py +++ b/darts/models/forecasting/regression_model.py @@ -902,7 +902,7 @@ def _optimized_historical_forecasts( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["position", "value"] = "value", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index 8cf9193fb2..9c99a0fa74 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -374,7 +374,7 @@ def test_historical_forecasts_local_models(self): "LocalForecastingModel does not support historical forecasting with `retrain` set to `False`" ) - def test_historical_forecasts_positional_index_start(self): + def test_historical_forecasts_position_start(self): series = tg.sine_timeseries(length=10) model = LinearRegressionModel(lags=2) @@ -382,14 +382,14 @@ def test_historical_forecasts_positional_index_start(self): # negative index forecasts_neg = model.historical_forecasts( - series=series, start=-2, start_format="positional_index", retrain=False + series=series, start=-2, start_format="position", retrain=False ) self.assertEqual(len(forecasts_neg), 2) self.assertTrue((series.time_index[-2:] == forecasts_neg.time_index).all()) # positive index forecasts_pos = model.historical_forecasts( - series=series, start=8, start_format="positional_index", retrain=False + series=series, start=8, start_format="position", retrain=False ) self.assertEqual(forecasts_pos, forecasts_neg) @@ -403,14 +403,14 @@ def test_historical_forecasts_negative_rangeindex(self): # start as point forecasts = model.historical_forecasts( - series=series, start=-2, start_format="value_index", retrain=False + series=series, start=-2, start_format="value", retrain=False ) self.assertEqual(len(forecasts), 7) self.assertTrue((series.time_index[-7:] == forecasts.time_index).all()) # start as index forecasts = model.historical_forecasts( - series=series, start=-2, start_format="positional_index", retrain=False + series=series, start=-2, start_format="position", retrain=False ) self.assertEqual(len(forecasts), 2) self.assertTrue((series.time_index[-2:] == forecasts.time_index).all()) @@ -650,27 +650,27 @@ def test_sanity_check_invalid_start(self): # positional_index, predicting only the last position LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=9, start_format="positional_index" + timeidx_, start=9, start_format="position" ) # positional_index, predicting from the first position with retrain=True with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=-10, start_format="positional_index" + timeidx_, start=-10, start_format="position" ) assert str(msg.value).endswith(", resulting in an empty training set.") # positional_index, beyond boundaries with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=10, start_format="positional_index" + timeidx_, start=10, start_format="position" ) assert str(msg.value).startswith( "`start` index `10` is out of bounds for series of length 10" ) with pytest.raises(ValueError) as msg: LinearRegressionModel(lags=1).historical_forecasts( - timeidx_, start=-11, start_format="positional_index" + timeidx_, start=-11, start_format="position" ) assert str(msg.value).startswith( "`start` index `-11` is out of bounds for series of length 10" diff --git a/darts/utils/historical_forecasts/optimized_historical_forecasts.py b/darts/utils/historical_forecasts/optimized_historical_forecasts.py index 661541a833..50c1364f26 100644 --- a/darts/utils/historical_forecasts/optimized_historical_forecasts.py +++ b/darts/utils/historical_forecasts/optimized_historical_forecasts.py @@ -25,7 +25,7 @@ def _optimized_historical_forecasts_regression_last_points_only( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["position", "value"] = "value", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, @@ -163,7 +163,7 @@ def _optimized_historical_forecasts_regression_all_points( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["position", "value"] = "value", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py index b06fc2db7f..173c27275d 100644 --- a/darts/utils/historical_forecasts/utils.py +++ b/darts/utils/historical_forecasts/utils.py @@ -68,10 +68,10 @@ def _historical_forecasts_general_checks(model, series, kwargs): logger, ) - if n.start_format == "positional_index": + if n.start_format == "position": raise_if_not( isinstance(n.start, (int, np.int64)), - f"Since `start_format='index'`, `start` must be an integer, received {type(n.start)}", + f"Since `start_format='position'`, `start` must be an integer, received {type(n.start)}", logger, ) @@ -108,7 +108,7 @@ def _historical_forecasts_general_checks(model, series, kwargs): ) elif isinstance(n.start, (int, np.int64)): out_of_bound_error = False - if n.start_format == "positional_index": + if n.start_format == "position": if (n.start > 0 and n.start >= len(series_)) or ( n.start < 0 and np.abs(n.start) > len(series_) ): @@ -142,7 +142,7 @@ def _historical_forecasts_general_checks(model, series, kwargs): logger, ) - if n.start_format == "value_index": + if n.start_format == "value": start = series_.get_timestamp_at_point(n.start) else: start = series_.time_index[n.start] @@ -396,7 +396,7 @@ def _adjust_historical_forecasts_time_index( forecast_horizon: int, overlap_end: bool, start: Optional[Union[pd.Timestamp, float, int]], - start_format: Literal["positional_index", "value_index"], + start_format: Literal["position", "value"], show_warnings: bool, ) -> TimeIndex: """ @@ -417,7 +417,7 @@ def _adjust_historical_forecasts_time_index( # when applicable, shift the start of the forecastable index based on `start` if start is not None: - if start_format == "value_index": + if start_format == "value": start_time_ = series.get_timestamp_at_point(start) else: start_time_ = series.time_index[start] From b3d5929888b3619749b3373291fe9ba6e479a7ad Mon Sep 17 00:00:00 2001 From: madtoinou Date: Mon, 14 Aug 2023 12:26:00 +0200 Subject: [PATCH 15/21] doc: updated start docstring --- darts/models/forecasting/forecasting_model.py | 44 ++++++------------- 1 file changed, 14 insertions(+), 30 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 16bb505fce..ed32092e81 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -615,15 +615,14 @@ def historical_forecasts( steps available, all steps up until prediction time are used, as in default case. Needs to be at least `min_train_series_length`. start - Optionally, the first point in time at which a prediction is computed for a future time. - This parameter supports: ``float``, ``int``, ``pandas.Timestamp``, and ``None``. - If a ``float``, the parameter will be treated as the proportion of the time series - that should lie before the first prediction point. - If an ``int``, the parameter will be treated as an integer index to the time index of - `series` that will be used as first prediction time. - If a ``pandas.Timestamp``, the time stamp will be used to determine the first prediction time - directly. - If ``None``, the first prediction time will automatically be set to: + Optionally, the first point in time at which a prediction is computed. This parameter supports: + ``float``, ``int``, ``pandas.Timestamp``, and ``None``. + If a ``float``, it is the proportion of the time series that should lie before the first prediction point. + If an ``int``, it is either the index position of the first prediction point for `series` with a + `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to + the index position with `start_format="position"`. + If a ``pandas.Timestamp``, it is the time stamp of the first prediction point. + If ``None``, the first prediction point will automatically be set to: - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first predictable point is earlier than the first trainable point. @@ -1101,25 +1100,10 @@ def backtest( steps available, all steps up until prediction time are used, as in default case. Needs to be at least `min_train_series_length`. start - Optionally, the first point in time at which a prediction is computed for a future time. - This parameter supports: ``float``, ``int`` and ``pandas.Timestamp``, and ``None``. - If a ``float``, the parameter will be treated as the proportion of the time series - that should lie before the first prediction point. - If an ``int``, the parameter will be treated as an integer index to the time index of - `series` that will be used as first prediction time. - If a ``pandas.Timestamp``, the time stamp will be used to determine the first prediction time - directly. - If ``None``, the first prediction time will automatically be set to: - - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first - predictable point is earlier than the first trainable point. - - - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`), - or `retrain` is a Callable and the first trainable point is earlier than the first predictable point. - - - the first trainable point (given `train_length`) otherwise - Note: Raises a ValueError if `start` yields a time outside the time index of `series`. - Note: If `start` is outside the possible historical forecasting times, will ignore the parameter - (default behavior with ``None``) and start at the first trainable/predictable point. + Optionally, the first point in time at which a prediction is computed. This parameter supports: + ``float``, ``int``, ``pandas.Timestamp``, and ``None``. + For a detailed description of how the different data types are interpreted, please see the documentation + for `ForecastingModel.historical_forecasts`. Only used in expanding window mode. start_format Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a `pd.RangeIndex`. For a detailed description this argument, please see the documentation for @@ -1299,8 +1283,8 @@ def gridsearch( stride The number of time steps between two consecutive predictions. Only used in expanding window mode. start - The ``int``, ``float`` or ``pandas.Timestamp`` that represents the starting point in the time index - of `series` from which predictions will be made to evaluate the model. + Optionally, the first point in time at which a prediction is computed. This parameter supports: + ``float``, ``int``, ``pandas.Timestamp``, and ``None``. For a detailed description of how the different data types are interpreted, please see the documentation for `ForecastingModel.historical_forecasts`. Only used in expanding window mode. start_format From 94842b046ad46ab9840a33baebd91d52797b798d Mon Sep 17 00:00:00 2001 From: madtoinou Date: Mon, 14 Aug 2023 13:04:19 +0200 Subject: [PATCH 16/21] test: limited the dependency on unittest in anticipation of the refactoring --- .../forecasting/test_historical_forecasts.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/darts/tests/models/forecasting/test_historical_forecasts.py b/darts/tests/models/forecasting/test_historical_forecasts.py index 9c99a0fa74..6258462077 100644 --- a/darts/tests/models/forecasting/test_historical_forecasts.py +++ b/darts/tests/models/forecasting/test_historical_forecasts.py @@ -384,14 +384,14 @@ def test_historical_forecasts_position_start(self): forecasts_neg = model.historical_forecasts( series=series, start=-2, start_format="position", retrain=False ) - self.assertEqual(len(forecasts_neg), 2) - self.assertTrue((series.time_index[-2:] == forecasts_neg.time_index).all()) + assert len(forecasts_neg) == 2 + assert (series.time_index[-2:] == forecasts_neg.time_index).all() # positive index forecasts_pos = model.historical_forecasts( series=series, start=8, start_format="position", retrain=False ) - self.assertEqual(forecasts_pos, forecasts_neg) + assert forecasts_pos == forecasts_neg def test_historical_forecasts_negative_rangeindex(self): series = TimeSeries.from_times_and_values( @@ -405,15 +405,15 @@ def test_historical_forecasts_negative_rangeindex(self): forecasts = model.historical_forecasts( series=series, start=-2, start_format="value", retrain=False ) - self.assertEqual(len(forecasts), 7) - self.assertTrue((series.time_index[-7:] == forecasts.time_index).all()) + assert len(forecasts) == 7 + assert (series.time_index[-7:] == forecasts.time_index).all() # start as index forecasts = model.historical_forecasts( series=series, start=-2, start_format="position", retrain=False ) - self.assertEqual(len(forecasts), 2) - self.assertTrue((series.time_index[-2:] == forecasts.time_index).all()) + assert len(forecasts) == 2 + assert (series.time_index[-2:] == forecasts.time_index).all() def test_historical_forecasts(self): train_length = 10 From f6f95bd6219b0ba11a2bddaec75709129c0d37f4 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Mon, 14 Aug 2023 13:08:19 +0200 Subject: [PATCH 17/21] doc: updated changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb05974336..eee112bfc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ but cannot always guarantee backwards compatibility. Changes that may **break co **Improvement** - `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). -- Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` so that integer `start` can be used a position or a value of the time index. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). +- Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch`; an integer `start` can be used as a `RangeIndex` position (previously, had to be able of the index). [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). **Fixed** - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou). From 35bf0969fccf9ab0f56a28d02c66e4668b76daf4 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Mon, 14 Aug 2023 13:10:21 +0200 Subject: [PATCH 18/21] fix: fixed typo --- darts/models/forecasting/forecasting_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index ed32092e81..ac99484e00 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1904,7 +1904,7 @@ def _optimized_historical_forecasts( future_covariates: Optional[Sequence[TimeSeries]] = None, num_samples: int = 1, start: Optional[Union[pd.Timestamp, float, int]] = None, - start_format: Literal["positional_index", "value_index"] = "value_index", + start_format: Literal["position", "value"] = "value", forecast_horizon: int = 1, stride: int = 1, overlap_end: bool = False, From ba1393431905525ed6b7a14d49f711992bc4d67e Mon Sep 17 00:00:00 2001 From: madtoinou Date: Mon, 14 Aug 2023 13:11:38 +0200 Subject: [PATCH 19/21] fix: fixed typo --- darts/utils/historical_forecasts/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/darts/utils/historical_forecasts/utils.py b/darts/utils/historical_forecasts/utils.py index 173c27275d..ee35d4fd32 100644 --- a/darts/utils/historical_forecasts/utils.py +++ b/darts/utils/historical_forecasts/utils.py @@ -575,7 +575,7 @@ def _get_historical_forecast_boundaries( past_covariates: Optional[TimeSeries], future_covariates: Optional[TimeSeries], start: Optional[Union[pd.Timestamp, float, int]], - start_format: Literal["positional_index", "value_index"], + start_format: Literal["position", "value"], forecast_horizon: int, overlap_end: bool, freq: pd.DateOffset, From 6a91897ab1ba939983e5f78838b35e2e62992701 Mon Sep 17 00:00:00 2001 From: madtoinou Date: Tue, 15 Aug 2023 09:49:10 +0200 Subject: [PATCH 20/21] doc: copy start and start_format docstring from hist_fct to backtest and gridsearch --- darts/models/forecasting/forecasting_model.py | 64 ++++++++++++++----- 1 file changed, 49 insertions(+), 15 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index ac99484e00..452d2368cd 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -1102,12 +1102,29 @@ def backtest( start Optionally, the first point in time at which a prediction is computed. This parameter supports: ``float``, ``int``, ``pandas.Timestamp``, and ``None``. - For a detailed description of how the different data types are interpreted, please see the documentation - for `ForecastingModel.historical_forecasts`. Only used in expanding window mode. + If a ``float``, it is the proportion of the time series that should lie before the first prediction point. + If an ``int``, it is either the index position of the first prediction point for `series` with a + `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to + the index position with `start_format="position"`. + If a ``pandas.Timestamp``, it is the time stamp of the first prediction point. + If ``None``, the first prediction point will automatically be set to: + + - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first + predictable point is earlier than the first trainable point. + - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`), + or `retrain` is a Callable and the first trainable point is earlier than the first predictable point. + - the first trainable point (given `train_length`) otherwise + + Note: Raises a ValueError if `start` yields a time outside the time index of `series`. + Note: If `start` is outside the possible historical forecasting times, will ignore the parameter + (default behavior with ``None``) and start at the first trainable/predictable point. start_format Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a - `pd.RangeIndex`. For a detailed description this argument, please see the documentation for - `ForecastingModel.historical_forecasts`. + `pd.RangeIndex`. + If set to 'position', `start` corresponds to the index position of the first predicted point and can range + from `(-len(series), len(series) - 1)`. + If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise + an error if the value is not in `series`' index. Default: ``'value'`` forecast_horizon The forecast horizon for the point predictions. stride @@ -1281,21 +1298,38 @@ def gridsearch( forecast_horizon The integer value of the forecasting horizon. Activates expanding window mode. stride - The number of time steps between two consecutive predictions. Only used in expanding window mode. + Only used in expanding window mode. The number of time steps between two consecutive predictions. start - Optionally, the first point in time at which a prediction is computed. This parameter supports: - ``float``, ``int``, ``pandas.Timestamp``, and ``None``. - For a detailed description of how the different data types are interpreted, please see the documentation - for `ForecastingModel.historical_forecasts`. Only used in expanding window mode. + Only used in expanding window mode. Optionally, the first point in time at which a prediction is computed. + This parameter supports: ``float``, ``int``, ``pandas.Timestamp``, and ``None``. + If a ``float``, it is the proportion of the time series that should lie before the first prediction point. + If an ``int``, it is either the index position of the first prediction point for `series` with a + `pd.DatetimeIndex`, or the index value for `series` with a `pd.RangeIndex`. The latter can be changed to + the index position with `start_format="position"`. + If a ``pandas.Timestamp``, it is the time stamp of the first prediction point. + If ``None``, the first prediction point will automatically be set to: + + - the first predictable point if `retrain` is ``False``, or `retrain` is a Callable and the first + predictable point is earlier than the first trainable point. + - the first trainable point if `retrain` is ``True`` or ``int`` (given `train_length`), + or `retrain` is a Callable and the first trainable point is earlier than the first predictable point. + - the first trainable point (given `train_length`) otherwise + + Note: Raises a ValueError if `start` yields a time outside the time index of `series`. + Note: If `start` is outside the possible historical forecasting times, will ignore the parameter + (default behavior with ``None``) and start at the first trainable/predictable point. start_format - Defines the `start` format. Only effective when `start` is an integer and `series` is indexed with a - `pd.RangeIndex`. For a detailed description this argument, please see the documentation for - `ForecastingModel.historical_forecasts`. + Only used in expanding window mode. Defines the `start` format. Only effective when `start` is an integer + and `series` is indexed with a `pd.RangeIndex`. + If set to 'position', `start` corresponds to the index position of the first predicted point and can range + from `(-len(series), len(series) - 1)`. + If set to 'value', `start` corresponds to the index value/label of the first predicted point. Will raise + an error if the value is not in `series`' index. Default: ``'value'`` last_points_only - Whether to use the whole forecasts or only the last point of each forecast to compute the error. Only used - in expanding window mode. + Only used in expanding window mode. Whether to use the whole forecasts or only the last point of each + forecast to compute the error. show_warnings - Whether to show warnings related to the `start` parameter. Only used in expanding window mode. + Only used in expanding window mode. Whether to show warnings related to the `start` parameter. val_series The TimeSeries instance used for validation in split mode. If provided, this series must start right after the end of `series`; so that a proper comparison of the forecast can be made. From 462c51a88b14df3c4f5c7e4160d8f5497ae37141 Mon Sep 17 00:00:00 2001 From: madtoinou <32447896+madtoinou@users.noreply.github.com> Date: Tue, 15 Aug 2023 09:49:59 +0200 Subject: [PATCH 21/21] Apply suggestions from code review Co-authored-by: Dennis Bader --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eee112bfc2..779e59ffc8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,9 @@ but cannot always guarantee backwards compatibility. Changes that may **break co ### For users of the library: -**Improvement** +**Improved** - `TimeSeries` with a `RangeIndex` starting in the negative start are now supported by `historical_forecasts`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). -- Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch`; an integer `start` can be used as a `RangeIndex` position (previously, had to be able of the index). [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). +- Added a new argument `start_format` to `historical_forecasts()`, `backtest()` and `gridsearch` that allows to use an integer `start` either as the index position or index value/label for `series` indexed with a `pd.RangeIndex`. [#1866](https://github.com/unit8co/darts/pull/1866) by [Antoine Madrona](https://github.com/madtoinou). **Fixed** - Fixed a bug in `TimeSeries.from_dataframe()` when using a pandas.DataFrame with `df.columns.name != None`. [#1938](https://github.com/unit8co/darts/pull/1938) by [Antoine Madrona](https://github.com/madtoinou).