Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/lagged features names #1679

Merged
merged 19 commits into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
e7efa59
feat: create and store the lagged features names in the regression mo…
madtoinou Mar 28, 2023
cc77936
feat: adding corresponding tests in tabularization
madtoinou Mar 28, 2023
dbc942b
fix: support any kind of Sequence to generate the lagged features name
madtoinou Mar 29, 2023
10060b7
feat: verify that the number of lagged feature names matches the feat…
madtoinou Mar 29, 2023
69a3819
fix: if any of the variate is a sequence of ts with different compone…
madtoinou Apr 5, 2023
b798c50
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 5, 2023
f48ea8a
fix: using the same naming convention for the lagged components as th…
madtoinou Apr 6, 2023
9fa93cb
refactor and fix some type hint warnings
dennisbader Apr 6, 2023
89dbb47
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 7, 2023
47e4214
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 8, 2023
b496881
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 10, 2023
8d2a03e
Merge branch 'master' into feat/lagged_features_names
madtoinou Apr 10, 2023
1b624f5
Merge branch 'feat/lagged_features_names' of https://github.com/unit8…
dennisbader Apr 11, 2023
76ee1c8
Merge branch 'master' into feat/lagged_features_names
dennisbader Apr 11, 2023
38c10a0
simplified lagged feature name generation and moved out of regression…
dennisbader Apr 11, 2023
1325983
fix regr model tests
dennisbader Apr 11, 2023
dd3798b
fix create lagged data tests
dennisbader Apr 11, 2023
557dbfe
fix small bug in unit test
dennisbader Apr 11, 2023
05428e5
fix bug in unittest from last PR
dennisbader Apr 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 73 additions & 1 deletion darts/models/forecasting/regression_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@
from darts.logging import get_logger, raise_if, raise_if_not, raise_log
from darts.models.forecasting.forecasting_model import GlobalForecastingModel
from darts.timeseries import TimeSeries
from darts.utils.data.tabularization import create_lagged_training_data
from darts.utils.data.tabularization import (
create_lagged_components_names,
create_lagged_training_data,
)
from darts.utils.multioutput import MultiOutputRegressor
from darts.utils.utils import (
_check_quantiles,
Expand Down Expand Up @@ -358,6 +361,32 @@ def _create_lagged_data(

return training_samples, training_labels

def _create_lagged_components_name(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can remove this method and put everything into the helper function create_lagged_component_names

self, target_series, past_covariates, future_covariates
):
lags = self.lags.get("target")
lags_past_covariates = self.lags.get("past")
lags_future_covariates = self.lags.get("future")

features_cols_name, labels_cols_name = create_lagged_components_names(
target_series=target_series,
past_covariates=past_covariates,
future_covariates=future_covariates,
lags=lags,
lags_past_covariates=lags_past_covariates,
lags_future_covariates=lags_future_covariates,
output_chunk_length=self.output_chunk_length,
concatenate=False,
)

# adding the static covariates on the right of each features_cols_name
features_cols_name = self._add_static_covariates_name(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can move this into the helper function create_lagged_component_names

features_cols_name,
target_series,
)

return features_cols_name, labels_cols_name

def _add_static_covariates(
self,
features: Union[np.array, Sequence[np.array]],
Expand Down Expand Up @@ -445,6 +474,41 @@ def _add_static_covariates(
features = features[0]
return features

def _add_static_covariates_name(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be part of create_lagged_component_names in my opinion

self,
features_cols_name: List[List[str]],
target_series: Union[TimeSeries, Sequence[TimeSeries]],
) -> Union[np.array, Sequence[np.array]]:
"""
Add static covariates names to the features name for RegressionModels.
Accounts for series with potentially different static covariates to accomodate for the maximum
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aren't the number of static covariates guaranteed to be identical? The models should throw an error when using series with different static covariate numbers, no?

number of available static_covariates in any of the given series in the sequence.

Parameters
----------
features_cols_name
The name of the features of the numpy array(s) to which the static covariates will be added, generated with
`create_lagged_components_names()`
target_series
The target series from which to read the static covariates.

Returns
-------
features_cols_name
The features' name list with appended static covariates names on the right.
"""
target_series = series2seq(target_series)

# collect static covariates info, preserve the order
static_covs_names = []
for ts in target_series:
if ts.has_static_covariates:
for static_cov_name in ts.static_covariates.keys():
if static_cov_name not in static_covs_names:
static_covs_names.append(static_cov_name)

return features_cols_name + static_covs_names

def _fit_model(
self,
target_series,
Expand All @@ -470,6 +534,14 @@ def _fit_model(
training_labels = training_labels.ravel()
self.model.fit(training_samples, training_labels, **kwargs)

# generate and store the lagged components names (for feature importance analysis)
lagged_features_names, _ = self._create_lagged_components_name(
target_series=target_series,
past_covariates=past_covariates,
future_covariates=future_covariates,
)
self.model.lagged_features_name_ = lagged_features_names
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's use a naming convention lagged_feature_names similar to feature_importances_ from sklearn.

Also shouldn't we store this in the Darts model, rather than the actual one?
Would also require to define it in the model constructor

Suggested change
self.model.lagged_features_name_ = lagged_features_names
self.lagged_feature_names_ = lagged_feature_names


def fit(
self,
series: Union[TimeSeries, Sequence[TimeSeries]],
Expand Down
29 changes: 28 additions & 1 deletion darts/tests/models/forecasting/test_regression_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -960,7 +960,7 @@ def test_static_cov_appended_values(self):
)
# Take only last sample:
expected_X_pred = expected_X_pred[-1, :].reshape(1, -1)
# Number of static covss to add = difference in width between feature
# Number of static covs to add = difference in width between feature
# matrix *with* static covs and feature matrix *without* static covs:
scov_width = expected_X.shape[1] - expected_X_pred.shape[1]
zeros_scovs = np.zeros((1, scov_width))
Expand Down Expand Up @@ -1156,13 +1156,40 @@ def test_static_cov_accuracy(self):
pred_no_static_cov = model_no_static_cov.predict(
n=period, series=fitting_series
)
# multiple series with different components names ("smooth" and "irregular"),
# triggers creation of generic feature names
expected_features_in = [
f"comp0_target_lag{str(-i)}" for i in range(period // 2, 0, -1)
]

self.assertEqual(
model_no_static_cov.model.lagged_features_name_, expected_features_in
)
self.assertEqual(
len(model_no_static_cov.model.feature_importances_),
len(expected_features_in),
)

fitting_series = [
train_series_static_cov[0][: (60 - period)],
train_series_static_cov[1][:60],
]
model_static_cov = RandomForest(lags=period // 2, bootstrap=False)
model_static_cov.fit(fitting_series)

# multiple univariates series with different names with same static cov
expected_features_in = [
f"comp0_target_lag{str(-i)}" for i in range(period // 2, 0, -1)
] + ["curve_type"]

self.assertEqual(
model_static_cov.model.lagged_features_name_, expected_features_in
)
self.assertEqual(
len(model_static_cov.model.feature_importances_),
len(expected_features_in),
)

pred_static_cov = model_static_cov.predict(n=period, series=fitting_series)

# then
Expand Down
Loading