Skip to content

Commit f635da0

Browse files
madtoinoudennisbader
authored andcommitted
Feat/lagged features names (unit8co#1679)
* feat: create and store the lagged features names in the regression models * feat: adding corresponding tests in tabularization * fix: support any kind of Sequence to generate the lagged features name * feat: verify that the number of lagged feature names matches the feature_importances in the relevant regression models * fix: if any of the variate is a sequence of ts with different components names, create generic name for the corresponding variate, updated the tests * fix: using the same naming convention for the lagged components as the explainability module * refactor and fix some type hint warnings * simplified lagged feature name generation and moved out of regression model * fix regr model tests * fix create lagged data tests * fix small bug in unit test * fix bug in unittest from last PR --------- Co-authored-by: dennisbader <[email protected]>
1 parent bb74a91 commit f635da0

File tree

5 files changed

+500
-20
lines changed

5 files changed

+500
-20
lines changed

darts/models/forecasting/regression_model.py

+33
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from darts.timeseries import TimeSeries
3838
from darts.utils.data.tabularization import (
3939
add_static_covariates_to_lagged_data,
40+
create_lagged_component_names,
4041
create_lagged_training_data,
4142
)
4243
from darts.utils.multioutput import MultiOutputRegressor
@@ -136,6 +137,7 @@ def __init__(
136137
self.multi_models = multi_models
137138
self._considers_static_covariates = use_static_covariates
138139
self._static_covariates_shape: Optional[Tuple[int, int]] = None
140+
self._lagged_feature_names: Optional[List[str]] = None
139141

140142
# model checks
141143
if self.model is None:
@@ -407,6 +409,19 @@ def _fit_model(
407409
training_labels = training_labels.ravel()
408410
self.model.fit(training_samples, training_labels, **kwargs)
409411

412+
# generate and store the lagged components names (for feature importance analysis)
413+
self._lagged_feature_names, _ = create_lagged_component_names(
414+
target_series=target_series,
415+
past_covariates=past_covariates,
416+
future_covariates=future_covariates,
417+
lags=self.lags.get("target"),
418+
lags_past_covariates=self.lags.get("past"),
419+
lags_future_covariates=self.lags.get("future"),
420+
output_chunk_length=self.output_chunk_length,
421+
concatenate=False,
422+
use_static_covariates=self.uses_static_covariates,
423+
)
424+
410425
def fit(
411426
self,
412427
series: Union[TimeSeries, Sequence[TimeSeries]],
@@ -766,6 +781,24 @@ def _predict_and_sample(
766781

767782
return prediction.reshape(k, self.pred_dim, -1)
768783

784+
@property
785+
def lagged_feature_names(self) -> Optional[List[str]]:
786+
"""The lagged feature names the model has been trained on.
787+
788+
The naming convention for target, past and future covariates is: ``"{name}_{type}_lag{i}"``, where:
789+
790+
- ``{name}`` the component name of the (first) series
791+
- ``{type}`` is the feature type, one of "target", "pastcov", and "futcov"
792+
- ``{i}`` is the lag value
793+
794+
The naming convention for static covariates is: ``"{name}_statcov_target_{comp}"``, where:
795+
796+
- ``{name}`` the static covariate name of the (first) series
797+
- ``{comp}`` the target component name of the (first) that the static covariate act on. If the static
798+
covariate acts globally on a multivariate target series, will show "global".
799+
"""
800+
return self._lagged_feature_names
801+
769802
def __str__(self):
770803
return self.model.__str__()
771804

darts/tests/models/forecasting/test_regression_models.py

+32-5
Original file line numberDiff line numberDiff line change
@@ -772,17 +772,23 @@ def test_optional_static_covariates(self):
772772
assert not model.uses_static_covariates
773773
assert model._static_covariates_shape is None
774774
preds = model.predict(n=2, series=series)
775-
assert preds.static_covariates.equals(series.static_covariates)
775+
# there seem to be some dtype issues with python=3.7
776+
np.testing.assert_almost_equal(
777+
preds.static_covariates.values,
778+
series.static_covariates.values,
779+
)
776780

777781
# with `use_static_covariates=True`, static covariates are included
778782
model = model_cls(lags=4, use_static_covariates=True)
779783
model.fit([series, series])
780784
assert model.uses_static_covariates
781785
assert model._static_covariates_shape == series.static_covariates.shape
782786
preds = model.predict(n=2, series=[series, series])
783-
assert all(
784-
[p.static_covariates.equals(series.static_covariates) for p in preds]
785-
)
787+
for pred in preds:
788+
np.testing.assert_almost_equal(
789+
pred.static_covariates.values,
790+
series.static_covariates.values,
791+
)
786792

787793
def test_static_cov_accuracy(self):
788794
"""
@@ -834,7 +840,6 @@ def test_static_cov_accuracy(self):
834840
model_static_cov = RandomForest(lags=period // 2, bootstrap=False)
835841
model_static_cov.fit(fitting_series)
836842
pred_static_cov = model_static_cov.predict(n=period, series=fitting_series)
837-
838843
# then
839844
for series, ps_no_st, ps_st_cat in zip(
840845
train_series_static_cov, pred_no_static_cov, pred_static_cov
@@ -855,13 +860,35 @@ def test_static_cov_accuracy(self):
855860
pred_no_static_cov = model_no_static_cov.predict(
856861
n=period, series=fitting_series
857862
)
863+
# multiple series with different components names ("smooth" and "irregular"),
864+
# will take first target name
865+
expected_features_in = [
866+
f"smooth_target_lag{str(-i)}" for i in range(period // 2, 0, -1)
867+
]
868+
self.assertEqual(model_no_static_cov.lagged_feature_names, expected_features_in)
869+
self.assertEqual(
870+
len(model_no_static_cov.model.feature_importances_),
871+
len(expected_features_in),
872+
)
858873

859874
fitting_series = [
860875
train_series_static_cov[0][: (60 - period)],
861876
train_series_static_cov[1][:60],
862877
]
863878
model_static_cov = RandomForest(lags=period // 2, bootstrap=False)
864879
model_static_cov.fit(fitting_series)
880+
881+
# multiple univariates series with different names with same static cov, will take name of first series
882+
expected_features_in = [
883+
f"smooth_target_lag{str(-i)}" for i in range(period // 2, 0, -1)
884+
] + ["curve_type_statcov_target_smooth"]
885+
886+
self.assertEqual(model_static_cov.lagged_feature_names, expected_features_in)
887+
self.assertEqual(
888+
len(model_static_cov.model.feature_importances_),
889+
len(expected_features_in),
890+
)
891+
865892
pred_static_cov = model_static_cov.predict(n=period, series=fitting_series)
866893

867894
# then

darts/tests/utils/tabularization/test_add_static_covariates.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class AddStaticToLaggedDataTestCase(DartsBaseTestClass):
1717
pd.DataFrame({"a": [0.0], "b": [1.0]})
1818
)
1919
series_stcov_multivar = series.with_static_covariates(
20-
pd.DataFrame({"a": [0.0, 1.0], "b": [1.0, 2.0]})
20+
pd.DataFrame({"a": [0.0, 1.0], "b": [10.0, 20.0]})
2121
)
2222
features = np.empty(shape=(len(series), 2))
2323

@@ -104,6 +104,9 @@ def test_add_static_covs_train(self):
104104
)
105105
assert [features_.shape == expected_shape for features_ in features]
106106
assert last_shape == self.series_stcov_multivar.static_covariates.shape
107+
assert np.all(
108+
features[0][:, -sum(last_shape) :] == np.array([0.0, 1.0, 10.0, 20.0])
109+
)
107110

108111
def test_add_static_covs_predict(self):
109112
# predicting when `last_shape` other than `None`
@@ -179,3 +182,6 @@ def test_add_static_covs_predict(self):
179182
)
180183
assert [features_.shape == expected_shape for features_ in features]
181184
assert last_shape == self.series_stcov_multivar.static_covariates.shape
185+
assert np.all(
186+
features[0][:, -sum(last_shape) :] == np.array([0.0, 1.0, 10.0, 20.0])
187+
)

0 commit comments

Comments
 (0)