Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/lagged features names #1679

Merged
merged 19 commits into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
e7efa59
feat: create and store the lagged features names in the regression mo…
madtoinou Mar 28, 2023
cc77936
feat: adding corresponding tests in tabularization
madtoinou Mar 28, 2023
dbc942b
fix: support any kind of Sequence to generate the lagged features name
madtoinou Mar 29, 2023
10060b7
feat: verify that the number of lagged feature names matches the feat…
madtoinou Mar 29, 2023
69a3819
fix: if any of the variate is a sequence of ts with different compone…
madtoinou Apr 5, 2023
b798c50
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 5, 2023
f48ea8a
fix: using the same naming convention for the lagged components as th…
madtoinou Apr 6, 2023
9fa93cb
refactor and fix some type hint warnings
dennisbader Apr 6, 2023
89dbb47
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 7, 2023
47e4214
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 8, 2023
b496881
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 10, 2023
8d2a03e
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 10, 2023
1b624f5
Merge branch 'feat/lagged_features_names' of https://github.com/unit8…
dennisbader Apr 11, 2023
76ee1c8
Merge branch 'master' into feat/lagged_features_names
dennisbader Apr 11, 2023
38c10a0
simplified lagged feature name generation and moved out of regression…
dennisbader Apr 11, 2023
1325983
fix regr model tests
dennisbader Apr 11, 2023
dd3798b
fix create lagged data tests
dennisbader Apr 11, 2023
557dbfe
fix small bug in unit test
dennisbader Apr 11, 2023
05428e5
fix bug in unittest from last PR
dennisbader Apr 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from darts.timeseries import TimeSeries
from darts.utils.data.tabularization import (
add_static_covariates_to_lagged_data,
create_lagged_component_names,
create_lagged_training_data,
)
from darts.utils.multioutput import MultiOutputRegressor
Expand Down Expand Up @@ -136,6 +137,7 @@ def __init__(
self.multi_models = multi_models
self._considers_static_covariates = use_static_covariates
self._static_covariates_shape: Optional[Tuple[int, int]] = None
self._lagged_feature_names: Optional[List[str]] = None

# model checks
if self.model is None:
Expand Down Expand Up @@ -407,6 +409,19 @@ def _fit_model(
training_labels = training_labels.ravel()
self.model.fit(training_samples, training_labels, **kwargs)

# generate and store the lagged components names (for feature importance analysis)
self._lagged_feature_names, _ = create_lagged_component_names(
target_series=target_series,
past_covariates=past_covariates,
future_covariates=future_covariates,
lags=self.lags.get("target"),
lags_past_covariates=self.lags.get("past"),
lags_future_covariates=self.lags.get("future"),
output_chunk_length=self.output_chunk_length,
concatenate=False,
use_static_covariates=self.uses_static_covariates,
)

def fit(
self,
series: Union[TimeSeries, Sequence[TimeSeries]],
Expand Down Expand Up @@ -766,6 +781,24 @@ def _predict_and_sample(

return prediction.reshape(k, self.pred_dim, -1)

@property
def lagged_feature_names(self) -> Optional[List[str]]:
"""The lagged feature names the model has been trained on.

The naming convention for target, past and future covariates is: ``"{name}_{type}_lag{i}"``, where:

- ``{name}`` the component name of the (first) series
- ``{type}`` is the feature type, one of "target", "pastcov", and "futcov"
- ``{i}`` is the lag value

The naming convention for static covariates is: ``"{name}_statcov_target_{comp}"``, where:

- ``{name}`` the static covariate name of the (first) series
- ``{comp}`` the target component name of the (first) that the static covariate act on. If the static
covariate acts globally on a multivariate target series, will show "global".
"""
return self._lagged_feature_names

def __str__(self):
return self.model.__str__()

Expand Down
37 changes: 32 additions & 5 deletions darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -772,17 +772,23 @@ def test_optional_static_covariates(self):
assert not model.uses_static_covariates
assert model._static_covariates_shape is None
preds = model.predict(n=2, series=series)
assert preds.static_covariates.equals(series.static_covariates)
# there seem to be some dtype issues with python=3.7
np.testing.assert_almost_equal(
preds.static_covariates.values,
series.static_covariates.values,
)

# with `use_static_covariates=True`, static covariates are included
model = model_cls(lags=4, use_static_covariates=True)
model.fit([series, series])
assert model.uses_static_covariates
assert model._static_covariates_shape == series.static_covariates.shape
preds = model.predict(n=2, series=[series, series])
assert all(
[p.static_covariates.equals(series.static_covariates) for p in preds]
)
for pred in preds:
np.testing.assert_almost_equal(
pred.static_covariates.values,
series.static_covariates.values,
)

def test_static_cov_accuracy(self):
"""
Expand Down Expand Up @@ -834,7 +840,6 @@ def test_static_cov_accuracy(self):
model_static_cov = RandomForest(lags=period // 2, bootstrap=False)
model_static_cov.fit(fitting_series)
pred_static_cov = model_static_cov.predict(n=period, series=fitting_series)

# then
for series, ps_no_st, ps_st_cat in zip(
train_series_static_cov, pred_no_static_cov, pred_static_cov
Expand All @@ -855,13 +860,35 @@ def test_static_cov_accuracy(self):
pred_no_static_cov = model_no_static_cov.predict(
n=period, series=fitting_series
)
# multiple series with different components names ("smooth" and "irregular"),
# will take first target name
expected_features_in = [
f"smooth_target_lag{str(-i)}" for i in range(period // 2, 0, -1)
]
self.assertEqual(model_no_static_cov.lagged_feature_names, expected_features_in)
self.assertEqual(
len(model_no_static_cov.model.feature_importances_),
len(expected_features_in),
)

fitting_series = [
train_series_static_cov[0][: (60 - period)],
train_series_static_cov[1][:60],
]
model_static_cov = RandomForest(lags=period // 2, bootstrap=False)
model_static_cov.fit(fitting_series)

# multiple univariates series with different names with same static cov, will take name of first series
expected_features_in = [
f"smooth_target_lag{str(-i)}" for i in range(period // 2, 0, -1)
] + ["curve_type_statcov_target_smooth"]

self.assertEqual(model_static_cov.lagged_feature_names, expected_features_in)
self.assertEqual(
len(model_static_cov.model.feature_importances_),
len(expected_features_in),
)

pred_static_cov = model_static_cov.predict(n=period, series=fitting_series)

# then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class AddStaticToLaggedDataTestCase(DartsBaseTestClass):
pd.DataFrame({"a": [0.0], "b": [1.0]})
)
series_stcov_multivar = series.with_static_covariates(
pd.DataFrame({"a": [0.0, 1.0], "b": [1.0, 2.0]})
pd.DataFrame({"a": [0.0, 1.0], "b": [10.0, 20.0]})
)
features = np.empty(shape=(len(series), 2))

Expand Down Expand Up @@ -104,6 +104,9 @@ def test_add_static_covs_train(self):
)
assert [features_.shape == expected_shape for features_ in features]
assert last_shape == self.series_stcov_multivar.static_covariates.shape
assert np.all(
features[0][:, -sum(last_shape) :] == np.array([0.0, 1.0, 10.0, 20.0])
)

def test_add_static_covs_predict(self):
# predicting when `last_shape` other than `None`
Expand Down Expand Up @@ -179,3 +182,6 @@ def test_add_static_covs_predict(self):
)
assert [features_.shape == expected_shape for features_ in features]
assert last_shape == self.series_stcov_multivar.static_covariates.shape
assert np.all(
features[0][:, -sum(last_shape) :] == np.array([0.0, 1.0, 10.0, 20.0])
)
Loading