5
5
A forecasting model using a linear regression of some of the target series' lags, as well as optionally some
6
6
covariate series' lags in order to obtain a forecast.
7
7
"""
8
- from typing import List , Tuple , Union
8
+ from typing import List , Optional , Sequence , Tuple , Union
9
9
10
- from sklearn .linear_model import LinearRegression
10
+ import numpy as np
11
+ from scipy .optimize import linprog
12
+ from sklearn .linear_model import LinearRegression , PoissonRegressor , QuantileRegressor
11
13
12
14
from darts .logging import get_logger
13
- from darts .models .forecasting .regression_model import RegressionModel
15
+ from darts .models .forecasting .regression_model import RegressionModel , _LikelihoodMixin
16
+ from darts .timeseries import TimeSeries
14
17
15
18
logger = get_logger (__name__ )
16
19
17
20
18
- class LinearRegressionModel (RegressionModel ):
21
+ class LinearRegressionModel (RegressionModel , _LikelihoodMixin ):
19
22
def __init__ (
20
23
self ,
21
24
lags : Union [int , list ] = None ,
22
25
lags_past_covariates : Union [int , List [int ]] = None ,
23
26
lags_future_covariates : Union [Tuple [int , int ], List [int ]] = None ,
24
27
output_chunk_length : int = 1 ,
28
+ likelihood : str = None ,
29
+ quantiles : List [float ] = None ,
30
+ random_state : Optional [int ] = None ,
25
31
** kwargs ,
26
32
):
27
33
"""Linear regression model.
@@ -44,17 +50,193 @@ def __init__(
44
50
Number of time steps predicted at once by the internal regression model. Does not have to equal the forecast
45
51
horizon `n` used in `predict()`. However, setting `output_chunk_length` equal to the forecast horizon may
46
52
be useful if the covariates don't extend far enough into the future.
53
+ likelihood
54
+ Can be set to `quantile` or 'poisson'. If set, the model will be probabilistic, allowing sampling at
55
+ prediction time. If set to `quantile`, the `sklearn.linear_model.QuantileRegressor` is used. Similarly, if
56
+ set to `poisson`, the `sklearn.linear_model.PoissonRegressor` is used.
57
+ quantiles
58
+ Fit the model to these quantiles if the `likelihood` is set to `quantile`.
59
+ random_state
60
+ Control the randomness of the sampling. Used as seed for
61
+ `link <https://numpy.org/doc/stable/reference/random/generator.html#numpy.random.Generator>`_ . Ignored when
62
+ no`likelihood` is set.
63
+ Default: ``None``.
47
64
**kwargs
48
- Additional keyword arguments passed to `sklearn.linear_model.LinearRegression`.
65
+ Additional keyword arguments passed to `sklearn.linear_model.LinearRegression` (by default), to
66
+ `sklearn.linear_model.PoissonRegressor` (if `likelihood="poisson"`), or to
67
+ `sklearn.linear_model.QuantileRegressor` (if `likelihood="quantile"`).
49
68
"""
50
69
self .kwargs = kwargs
70
+ self ._median_idx = None
71
+ self ._model_container = None
72
+ self .quantiles = None
73
+ self .likelihood = likelihood
74
+ self ._rng = None
75
+
76
+ # parse likelihood
77
+ available_likelihoods = ["quantile" , "poisson" ] # to be extended
78
+ if likelihood is not None :
79
+ self ._check_likelihood (likelihood , available_likelihoods )
80
+ self ._rng = np .random .default_rng (seed = random_state )
81
+
82
+ if likelihood == "poisson" :
83
+ model = PoissonRegressor (** kwargs )
84
+ if likelihood == "quantile" :
85
+ model = QuantileRegressor (** kwargs )
86
+ self .quantiles , self ._median_idx = self ._prepare_quantiles (quantiles )
87
+ self ._model_container = self ._get_model_container ()
88
+ else :
89
+ model = LinearRegression (** kwargs )
90
+
51
91
super ().__init__ (
52
92
lags = lags ,
53
93
lags_past_covariates = lags_past_covariates ,
54
94
lags_future_covariates = lags_future_covariates ,
55
95
output_chunk_length = output_chunk_length ,
56
- model = LinearRegression ( ** kwargs ) ,
96
+ model = model ,
57
97
)
58
98
59
99
def __str__ (self ):
60
100
return f"LinearRegression(lags={ self .lags } )"
101
+
102
+ def fit (
103
+ self ,
104
+ series : Union [TimeSeries , Sequence [TimeSeries ]],
105
+ past_covariates : Optional [Union [TimeSeries , Sequence [TimeSeries ]]] = None ,
106
+ future_covariates : Optional [Union [TimeSeries , Sequence [TimeSeries ]]] = None ,
107
+ max_samples_per_ts : Optional [int ] = None ,
108
+ n_jobs_multioutput_wrapper : Optional [int ] = None ,
109
+ ** kwargs ,
110
+ ):
111
+ """
112
+ Fit/train the model on one or multiple series.
113
+
114
+ Parameters
115
+ ----------
116
+ series
117
+ TimeSeries or Sequence[TimeSeries] object containing the target values.
118
+ past_covariates
119
+ Optionally, a series or sequence of series specifying past-observed covariates
120
+ future_covariates
121
+ Optionally, a series or sequence of series specifying future-known covariates
122
+ max_samples_per_ts
123
+ This is an integer upper bound on the number of tuples that can be produced
124
+ per time series. It can be used in order to have an upper bound on the total size of the dataset and
125
+ ensure proper sampling. If `None`, it will read all of the individual time series in advance (at dataset
126
+ creation) to know their sizes, which might be expensive on big datasets.
127
+ If some series turn out to have a length that would allow more than `max_samples_per_ts`, only the
128
+ most recent `max_samples_per_ts` samples will be considered.
129
+ n_jobs_multioutput_wrapper
130
+ Number of jobs of the MultiOutputRegressor wrapper to run in parallel. Only used if the model doesn't
131
+ support multi-output regression natively.
132
+ **kwargs
133
+ Additional keyword arguments passed to the `fit` method of the model.
134
+ """
135
+
136
+ if self .likelihood == "quantile" :
137
+ # empty model container in case of multiple calls to fit, e.g. when backtesting
138
+ self ._model_container .clear ()
139
+
140
+ # set solver for linear program
141
+ if "solver" not in self .kwargs :
142
+ # set default fast solver
143
+ self .kwargs ["solver" ] = "highs"
144
+
145
+ # test solver availability with dummy problem
146
+ c = [1 ]
147
+ try :
148
+ linprog (c = c , method = self .kwargs ["solver" ])
149
+ except ValueError as ve :
150
+ logger .warning (
151
+ f"{ ve } . Upgrading scipy enables significantly faster solvers"
152
+ )
153
+ # set solver to slow legacy
154
+ self .kwargs ["solver" ] = "interior-point"
155
+
156
+ for quantile in self .quantiles :
157
+ self .kwargs ["quantile" ] = quantile
158
+ self .model = QuantileRegressor (** self .kwargs )
159
+ super ().fit (
160
+ series = series ,
161
+ past_covariates = past_covariates ,
162
+ future_covariates = future_covariates ,
163
+ max_samples_per_ts = max_samples_per_ts ,
164
+ ** kwargs ,
165
+ )
166
+
167
+ self ._model_container [quantile ] = self .model
168
+
169
+ return self
170
+
171
+ else :
172
+ super ().fit (
173
+ series = series ,
174
+ past_covariates = past_covariates ,
175
+ future_covariates = future_covariates ,
176
+ max_samples_per_ts = max_samples_per_ts ,
177
+ ** kwargs ,
178
+ )
179
+
180
+ return self
181
+
182
+ def predict (
183
+ self ,
184
+ n : int ,
185
+ series : Optional [Union [TimeSeries , Sequence [TimeSeries ]]] = None ,
186
+ past_covariates : Optional [Union [TimeSeries , Sequence [TimeSeries ]]] = None ,
187
+ future_covariates : Optional [Union [TimeSeries , Sequence [TimeSeries ]]] = None ,
188
+ num_samples : int = 1 ,
189
+ ** kwargs ,
190
+ ) -> Union [TimeSeries , Sequence [TimeSeries ]]:
191
+ """Forecasts values for `n` time steps after the end of the series.
192
+
193
+ Parameters
194
+ ----------
195
+ n : int
196
+ Forecast horizon - the number of time steps after the end of the series for which to produce predictions.
197
+ series : TimeSeries or list of TimeSeries, optional
198
+ Optionally, one or several input `TimeSeries`, representing the history of the target series whose future
199
+ is to be predicted. If specified, the method returns the forecasts of these series. Otherwise, the method
200
+ returns the forecast of the (single) training series.
201
+ past_covariates : TimeSeries or list of TimeSeries, optional
202
+ Optionally, the past-observed covariates series needed as inputs for the model.
203
+ They must match the covariates used for training in terms of dimension and type.
204
+ future_covariates : TimeSeries or list of TimeSeries, optional
205
+ Optionally, the future-known covariates series needed as inputs for the model.
206
+ They must match the covariates used for training in terms of dimension and type.
207
+ num_samples : int, default: 1
208
+ Specifies the numer of samples to obtain from the model. Should be set to 1 if no `likelihood` is specified.
209
+ **kwargs : dict, optional
210
+ Additional keyword arguments passed to the `predict` method of the model. Only works with
211
+ univariate target series.
212
+ """
213
+
214
+ if self .likelihood == "quantile" :
215
+ model_outputs = []
216
+ for quantile , fitted in self ._model_container .items ():
217
+ self .model = fitted
218
+ prediction = super ().predict (
219
+ n , series , past_covariates , future_covariates , ** kwargs
220
+ )
221
+ model_outputs .append (prediction .all_values (copy = False ))
222
+ model_outputs = np .concatenate (model_outputs , axis = - 1 )
223
+ samples = self ._sample_quantiles (model_outputs , num_samples )
224
+
225
+ # build timeseries from samples
226
+ return self ._ts_like (prediction , samples )
227
+
228
+ elif self .likelihood == "poisson" :
229
+ prediction = super ().predict (
230
+ n , series , past_covariates , future_covariates , ** kwargs
231
+ )
232
+ samples = self ._sample_poisson (
233
+ np .array (prediction .all_values (copy = False )), num_samples
234
+ )
235
+
236
+ # build timeseries from samples
237
+ return self ._ts_like (prediction , samples )
238
+
239
+ else :
240
+ return super ().predict (
241
+ n , series , past_covariates , future_covariates , num_samples , ** kwargs
242
+ )
0 commit comments