From 2dd3e7b6b7fa85d31e1930e90be333ccafac752f Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 26 Jun 2022 16:39:31 +0200 Subject: [PATCH 01/16] categorical static covariate support for TFTModel --- darts/models/forecasting/tft_model.py | 149 +++++++++++++++++++--- darts/models/forecasting/tft_submodels.py | 121 ++++++++---------- 2 files changed, 184 insertions(+), 86 deletions(-) diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py index caa24845e6..246e12d54c 100644 --- a/darts/models/forecasting/tft_model.py +++ b/darts/models/forecasting/tft_model.py @@ -19,6 +19,7 @@ _GateAddNorm, _GatedResidualNetwork, _InterpretableMultiHeadAttention, + _MultiEmbedding, _VariableSelectionNetwork, ) from darts.models.forecasting.torch_forecasting_model import MixedCovariatesTorchModel @@ -43,14 +44,15 @@ def __init__( output_dim: Tuple[int, int], variables_meta: Dict[str, Dict[str, List[str]]], num_static_components: int, - hidden_size: Union[int, List[int]] = 16, - lstm_layers: int = 1, - num_attention_heads: int = 4, - full_attention: bool = False, - feed_forward: str = "GatedResidualNetwork", - hidden_continuous_size: int = 8, - dropout: float = 0.1, - add_relative_index: bool = False, + hidden_size: Union[int, List[int]], + lstm_layers: int, + num_attention_heads: int, + full_attention: bool, + feed_forward: str, + hidden_continuous_size: int, + categorical_embedding_sizes: Dict[str, Tuple[int, int]], + dropout: float, + add_relative_index: bool, **kwargs, ): @@ -81,7 +83,10 @@ def __init__( Set the feedforward network block. default `GatedResidualNetwork` or one of the glu variant. Defaults to `GatedResidualNetwork`. hidden_continuous_size : int - default for hidden size for processing continuous variables' + default for hidden size for processing continuous variables. + categorical_embedding_sizes : int + Embedding size for categorical static covariates. Only effective if the target series contains + categorical (non-numeric) static covariates. dropout : float Fraction of neurons affected by Dropout. add_relative_index : bool @@ -103,6 +108,7 @@ def __init__( self.num_static_components = num_static_components self.hidden_size = hidden_size self.hidden_continuous_size = hidden_continuous_size + self.categorical_embedding_sizes = categorical_embedding_sizes self.lstm_layers = lstm_layers self.num_attention_heads = num_attention_heads self.full_attention = full_attention @@ -123,23 +129,41 @@ def __init__( # _attn: Attention # # processing inputs + # embeddings + self.input_embeddings = _MultiEmbedding( + embedding_sizes=categorical_embedding_sizes, + variable_names=self.categorical_static_variables, + ) + # continuous variable processing self.prescalers_linear = { name: nn.Linear( - 1 if name not in self.static_variables else self.num_static_components, + 1 + if name not in self.numeric_static_variables + else self.num_static_components, self.hidden_continuous_size, ) for name in self.reals } + # static (categorical and numerical) variables static_input_sizes = { - name: self.hidden_continuous_size for name in self.static_variables + name: self.input_embeddings.output_size[name] + for name in self.categorical_static_variables } + static_input_sizes.update( + { + name: self.hidden_continuous_size + for name in self.numeric_static_variables + } + ) self.static_covariates_vsn = _VariableSelectionNetwork( input_sizes=static_input_sizes, hidden_size=self.hidden_size, - input_embedding_flags={}, # this would be required for categorical inputs + input_embedding_flags={ + name: True for name in self.categorical_static_variables + }, dropout=self.dropout, prescalers=self.prescalers_linear, single_variable_grns={}, @@ -158,7 +182,7 @@ def __init__( self.encoder_vsn = _VariableSelectionNetwork( input_sizes=encoder_input_sizes, hidden_size=self.hidden_size, - input_embedding_flags={}, # this would be required for categorical inputs + input_embedding_flags={}, # this would be required for non-static categorical inputs dropout=self.dropout, context_size=self.hidden_size, prescalers=self.prescalers_linear, @@ -168,7 +192,7 @@ def __init__( self.decoder_vsn = _VariableSelectionNetwork( input_sizes=decoder_input_sizes, hidden_size=self.hidden_size, - input_embedding_flags={}, # this would be required for categorical inputs + input_embedding_flags={}, # this would be required for non-static categorical inputs dropout=self.dropout, context_size=self.hidden_size, prescalers=self.prescalers_linear, @@ -280,9 +304,22 @@ def static_variables(self) -> List[str]: """ List of all static variables in model """ - # TODO: (Darts: dbader) we might want to include static variables in the future? return self.variables_meta["model_config"]["static_input"] + @property + def numeric_static_variables(self) -> List[str]: + """ + List of numeric static variables in model + """ + return self.variables_meta["model_config"]["static_input_numeric"] + + @property + def categorical_static_variables(self) -> List[str]: + """ + List of categorical static variables in model + """ + return self.variables_meta["model_config"]["static_input_categorical"] + @property def encoder_variables(self) -> List[str]: """ @@ -442,9 +479,25 @@ def forward( # Embedding and variable selection if self.static_variables: - static_embedding = { - name: x_static[:, :, i] for i, name in enumerate(self.static_variables) - } + # categorical static covariate embeddings + static_embedding = self.input_embeddings( + torch.cat( + [ + x_static[:, :, idx] + for idx, name in enumerate(self.static_variables) + if name in self.categorical_static_variables + ], + dim=1, + ).int() + ) + # add numerical static covariates + static_embedding.update( + { + name: x_static[:, :, idx] + for idx, name in enumerate(self.static_variables) + if name in self.numeric_static_variables + } + ) static_embedding, static_covariate_var = self.static_covariates_vsn( static_embedding ) @@ -570,6 +623,7 @@ def __init__( feed_forward: str = "GatedResidualNetwork", dropout: float = 0.1, hidden_continuous_size: int = 8, + categorical_embedding_sizes: Optional[Dict[str, Tuple[int, int]]] = None, add_relative_index: bool = False, loss_fn: Optional[nn.Module] = None, likelihood: Optional[Likelihood] = None, @@ -609,7 +663,7 @@ def __init__( full_attention : bool If ``True``, applies multi-head attention query on past (encoder) and future (decoder) parts. Otherwise, only queries on future part. Defaults to ``False``. - feed_forward: str + feed_forward : str A feedforward network is a fully-connected layer with an activation. TFT Can be one of the glu variant's FeedForward Network (FFN)[2]. The glu variant's FeedForward Network are a series of FFNs designed to work better with Transformer based models. Defaults to ``"GatedResidualNetwork"``. @@ -622,6 +676,9 @@ def __init__( prediction time). hidden_continuous_size : int Default for hidden size for processing continuous variables + categorical_embedding_sizes : int + Embedding size for categorical static covariates. Only effective if the target series contains + categorical (non-numeric) static covariates. add_relative_index : bool Whether to add positional values to future covariates. Defaults to ``False``. This allows to use the TFTModel without having to pass future_covariates to :fun:`fit()` and @@ -783,6 +840,11 @@ def __init__( self.feed_forward = feed_forward self.dropout = dropout self.hidden_continuous_size = hidden_continuous_size + self.categorical_embedding_sizes = ( + categorical_embedding_sizes + if categorical_embedding_sizes is not None + else {} + ) self.add_relative_index = add_relative_index self.output_dim: Optional[Tuple[int, int]] = None @@ -880,25 +942,60 @@ def _create_model(self, train_sample: MixedCovariatesTrainTensorType) -> nn.Modu } reals_input = [] + categorical_input = [] time_varying_encoder_input = [] time_varying_decoder_input = [] static_input = [] + static_input_numeric = [] + static_input_categorical = [] + categorical_embedding_sizes = {} for input_var in type_names: if input_var in variables_meta["input"]: vars_meta = variables_meta["input"][input_var] - reals_input += vars_meta if input_var in [ "past_target", "past_covariate", "historic_future_covariate", ]: time_varying_encoder_input += vars_meta + reals_input += vars_meta elif input_var in ["future_covariate"]: time_varying_decoder_input += vars_meta + reals_input += vars_meta elif input_var in ["static_covariate"]: - static_input += vars_meta + static_covs = self.training_series.static_covariates + static_covs_is_real = static_covs.columns.isin( + static_covs.select_dtypes(include=np.number) + ) + cat_cols = static_covs.columns[~static_covs_is_real] + missing_embeddings = [ + col + for col in cat_cols + if col not in self.categorical_embedding_sizes + ] + raise_if( + len(missing_embeddings) > 0, + f"Missing embedding sizes for categorical static covarites: {missing_embeddings}", + logger, + ) + for idx, (static_var, col_name, is_real) in enumerate( + zip(vars_meta, static_covs.columns, static_covs_is_real) + ): + static_input.append(static_var) + if is_real: + static_input_numeric.append(static_var) + reals_input.append(static_var) + else: + static_input_categorical.append(static_var) + categorical_input.append(static_var) + categorical_embedding_sizes[ + vars_meta[idx] + ] = self.categorical_embedding_sizes[col_name] variables_meta["model_config"]["reals_input"] = list(dict.fromkeys(reals_input)) + variables_meta["model_config"]["categorical_input"] = list( + dict.fromkeys(categorical_input) + ) variables_meta["model_config"]["time_varying_encoder_input"] = list( dict.fromkeys(time_varying_encoder_input) ) @@ -908,10 +1005,19 @@ def _create_model(self, train_sample: MixedCovariatesTrainTensorType) -> nn.Modu variables_meta["model_config"]["static_input"] = list( dict.fromkeys(static_input) ) + variables_meta["model_config"]["static_input_numeric"] = list( + dict.fromkeys(static_input_numeric) + ) + variables_meta["model_config"]["static_input_categorical"] = list( + dict.fromkeys(static_input_categorical) + ) n_static_components = ( len(static_covariates) if static_covariates is not None else 0 ) + + self.categorical_embedding_sizes = categorical_embedding_sizes + return _TFTModule( output_dim=self.output_dim, variables_meta=variables_meta, @@ -923,6 +1029,7 @@ def _create_model(self, train_sample: MixedCovariatesTrainTensorType) -> nn.Modu full_attention=self.full_attention, feed_forward=self.feed_forward, hidden_continuous_size=self.hidden_continuous_size, + categorical_embedding_sizes=self.categorical_embedding_sizes, add_relative_index=self.add_relative_index, **self.pl_module_params, ) diff --git a/darts/models/forecasting/tft_submodels.py b/darts/models/forecasting/tft_submodels.py index 167c7007d0..731f9e11bb 100644 --- a/darts/models/forecasting/tft_submodels.py +++ b/darts/models/forecasting/tft_submodels.py @@ -34,6 +34,21 @@ HiddenState = Union[Tuple[torch.Tensor, torch.Tensor], torch.Tensor] +def get_embedding_size(n: int, max_size: int = 100) -> int: + """ + Determine empirically good embedding sizes (formula taken from fastai). + Args: + n (int): number of classes + max_size (int, optional): maximum embedding size. Defaults to 100. + Returns: + int: embedding size + """ + if n > 2: + return min(round(1.6 * n**0.56), max_size) + else: + return 1 + + class _TimeDistributedEmbeddingBag(nn.EmbeddingBag): def __init__(self, *args, batch_first: bool = False, **kwargs): super().__init__(*args, **kwargs) @@ -65,78 +80,54 @@ class _MultiEmbedding(nn.Module): def __init__( self, embedding_sizes: Dict[str, Tuple[int, int]], - categorical_groups: Dict[str, List[str]], - embedding_paddings: List[str], - x_categoricals: List[str], - max_embedding_size: int = None, + variable_names: List[str], ): + """Embedding layer for categorical variables including groups of categorical variables. + Enabled for static and dynamic categories (i.e. 3 dimensions for batch x time x categories). + + Parameters + ---------- + embedding_sizes + dictionary of embedding sizes, e.g. ``{'cat1': (10, 3)}`` + indicates that the first categorical variable has 10 unique values which are mapped to 3 embedding + dimensions. Use :py:func:`~pytorch_forecasting.utils.get_embedding_size` to automatically obtain + reasonable embedding sizes depending on the number of categories. + variable_names + list of categorical variable names to ensure ordered iterations. + """ super().__init__() self.embedding_sizes = embedding_sizes - self.categorical_groups = categorical_groups - self.embedding_paddings = embedding_paddings - self.max_embedding_size = max_embedding_size - self.x_categoricals = x_categoricals - - self.init_embeddings() - - def init_embeddings(self): - self.embeddings = nn.ModuleDict() - for name in self.embedding_sizes.keys(): - embedding_size = self.embedding_sizes[name][1] - if self.max_embedding_size is not None: - embedding_size = min(embedding_size, self.max_embedding_size) - # convert to list to become mutable - self.embedding_sizes[name] = list(self.embedding_sizes[name]) - self.embedding_sizes[name][1] = embedding_size - if name in self.categorical_groups: # embedding bag if related embeddings - self.embeddings[name] = _TimeDistributedEmbeddingBag( - self.embedding_sizes[name][0], - embedding_size, - mode="sum", - batch_first=True, - ) - else: - if name in self.embedding_paddings: - padding_idx = 0 - else: - padding_idx = None - self.embeddings[name] = nn.Embedding( - self.embedding_sizes[name][0], - embedding_size, - padding_idx=padding_idx, - ) - - def names(self): - return list(self.keys()) - - def items(self): - return self.embeddings.items() + self.variable_names = variable_names - def keys(self): - return self.embeddings.keys() + self.embeddings = nn.ModuleDict( + {name: nn.Embedding(*embedding_sizes[name]) for name in variable_names} + ) - def values(self): - return self.embeddings.values() + @property + def input_size(self) -> int: + return len(self.variable_names) - def __getitem__(self, name: str): - return self.embeddings[name] + @property + def output_size(self) -> Union[Dict[str, int], int]: + return {name: sizes[1] for name, sizes in self.embedding_sizes.items()} - def forward(self, x): - input_vectors = {} - for name, emb in self.embeddings.items(): - if name in self.categorical_groups: - input_vectors[name] = emb( - x[ - ..., - [ - self.x_categoricals.index(cat_name) - for cat_name in self.categorical_groups[name] - ], - ] - ) - else: - input_vectors[name] = emb(x[..., self.x_categoricals.index(name)]) - return input_vectors + def forward(self, x: torch.Tensor) -> Dict[str, torch.Tensor]: + """ + Parameters + ---------- + x + input tensor of shape batch x (optional) time x categoricals in the order of ``variable_names``. + + Returns + ------- + dict + dictionary of category names to embeddings of shape batch x (optional) time x embedding_size if + ``embedding_size`` is given as dictionary. + """ + return { + name: self.embeddings[name](x[..., i]) + for i, name in enumerate(self.variable_names) + } class _TimeDistributedInterpolation(nn.Module): From 624feeee21c896632a0a60d2fe7f623b37baad6f Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 26 Jun 2022 20:45:39 +0200 Subject: [PATCH 02/16] from_group_dataframe fix --- darts/models/forecasting/forecasting_model.py | 6 ++++++ darts/models/forecasting/tft_model.py | 2 +- darts/timeseries.py | 4 +++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/darts/models/forecasting/forecasting_model.py b/darts/models/forecasting/forecasting_model.py index 82e33f4499..6b0732f292 100644 --- a/darts/models/forecasting/forecasting_model.py +++ b/darts/models/forecasting/forecasting_model.py @@ -104,6 +104,10 @@ def __init__(self, *args, **kwargs): # This is only used if the model has been fit on one time series. self.training_series: Optional[TimeSeries] = None + # Static covariates sample from the (first) target series used for training the model through the `fit()` + # function. + self.static_covariates: Optional[pd.DataFrame] = None + # state; whether the model has been fit (on a single time series) self._fit_called = False @@ -959,11 +963,13 @@ def fit( if isinstance(series, TimeSeries): # if only one series is provided, save it for prediction time (including covariates, if available) self.training_series = series + self.static_covariates = series.static_covariates if past_covariates is not None: self.past_covariate_series = past_covariates if future_covariates is not None: self.future_covariate_series = future_covariates else: + self.static_covariates = series[0].static_covariates if past_covariates is not None: self._expect_past_covariates = True if future_covariates is not None: diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py index 246e12d54c..1c572507f8 100644 --- a/darts/models/forecasting/tft_model.py +++ b/darts/models/forecasting/tft_model.py @@ -963,7 +963,7 @@ def _create_model(self, train_sample: MixedCovariatesTrainTensorType) -> nn.Modu time_varying_decoder_input += vars_meta reals_input += vars_meta elif input_var in ["static_covariate"]: - static_covs = self.training_series.static_covariates + static_covs = self.static_covariates static_covs_is_real = static_covs.columns.isin( static_covs.select_dtypes(include=np.number) ) diff --git a/darts/timeseries.py b/darts/timeseries.py index f8c244d017..1a6708495c 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -765,7 +765,9 @@ def from_group_dataframe( # store static covariate Series and group DataFrame (without static cov columns) splits.append( ( - pd.DataFrame([static_cov_vals], columns=static_cov_cols), + pd.DataFrame([static_cov_vals], columns=static_cov_cols).astype( + {col: df[col].dtype for col in static_cov_cols} + ), group.drop(columns=static_cov_cols), ) ) From 34221b776b9fb1013d7043e38fed4205d6a7a6f2 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Fri, 8 Jul 2022 19:04:14 +0200 Subject: [PATCH 03/16] added static covariate transformer --- darts/dataprocessing/transformers/__init__.py | 1 + .../static_covariates_transformer.py | 249 ++++++++++++++++++ .../test_static_covariates_transformer.py | 98 +++++++ 3 files changed, 348 insertions(+) create mode 100644 darts/dataprocessing/transformers/static_covariates_transformer.py create mode 100644 darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py diff --git a/darts/dataprocessing/transformers/__init__.py b/darts/dataprocessing/transformers/__init__.py index d7f64af4fd..c59667a17b 100644 --- a/darts/dataprocessing/transformers/__init__.py +++ b/darts/dataprocessing/transformers/__init__.py @@ -15,3 +15,4 @@ TopDownReconciliator, ) from .scaler import Scaler +from .static_covariates_transformer import StaticCovariatesTransformer diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py new file mode 100644 index 0000000000..a96444ab3c --- /dev/null +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -0,0 +1,249 @@ +""" +Static Covariates Transformer +------ +""" + +from typing import Any, Iterator, List, Optional, Sequence, Tuple, Union + +import numpy as np +import pandas as pd +from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder + +from darts.logging import get_logger, raise_log +from darts.timeseries import TimeSeries + +from .fittable_data_transformer import FittableDataTransformer +from .invertible_data_transformer import InvertibleDataTransformer +from .scaler import Scaler + +logger = get_logger(__name__) + + +class StaticCovariatesTransformer(InvertibleDataTransformer, FittableDataTransformer): + def __init__( + self, + scaler_numerical=None, + scaler_categorical=None, + name="StaticCovariatesTransformer", + n_jobs: int = 1, + verbose: bool = False, + ): + """Generic wrapper class for scalers/encoders/transformers of static covariates. + + The underlying `scaler_numerical` and `scaler_categorical` have to implement the ``fit()``, ``transform()`` + and ``inverse_transform()`` methods (typically from scikit-learn). + + `scaler_numerical` addresses numerical static covariate data of the underlying series. + `scaler_categorical` addresses categorical static covariate data. + + Parameters + ---------- + scaler_numerical + The scaler to transform numeric static covariate data with. It must provide ``fit()``, + ``transform()`` and ``inverse_transform()`` methods. + Default: :class:`sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all + the values of a time series between 0 and 1. + scaler_categorical + The scaler to transform categorical static covariate data with. It must provide ``fit()``, + ``transform()`` and ``inverse_transform()`` methods. + Default: :class:`sklearn.preprocessing.OrdinalEncoder(feature_range=(0, 1))`; this will convert categories + into integer valued arrays where each integer stands for a specific category. + name + A specific name for the scaler + n_jobs + The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is + passed as input to a method, parallelising operations regarding different ``TimeSeries``. Defaults to `1` + (sequential). Setting the parameter to `-1` means using all the available processors. + Note: for a small amount of data, the parallelisation overhead could end up increasing the total + required amount of time. + verbose + Optionally, whether to print operations progress + + Examples + -------- + >>> from darts.datasets import AirPassengersDataset + >>> from sklearn.preprocessing import MinMaxScaler, OrdicalEncoder + >>> from darts.dataprocessing.transformers import StaticCovariatesTransformer + >>> series = AirPassengersDataset().load() + >>> scaler_num = MinMaxScaler(feature_range=(-1, 1)) + >>> scaler_cat = OrdinalEncoder() + >>> transformer = StaticCovariatesTransformer(scaler_numerical=scaler_num, scaler_categorical=scaler_cat) + >>> series_transformed = transformer.fit_transform(series) + >>> print(series.static_covariates_values()) + [-1.] + >>> print(series_transformed.static_covariates_values()) + [2.] + """ + super().__init__(name=name, n_jobs=n_jobs, verbose=verbose) + + self.transformer_cont = Scaler( + scaler=scaler_numerical, name=name, n_jobs=n_jobs, verbose=verbose + ) + + if scaler_numerical is None: + self.scaler_numerical = MinMaxScaler(feature_range=(0, 1)) + if scaler_categorical is None: + self.scaler_categorical = OrdinalEncoder() + + for scaler, scaler_name in zip( + [self.scaler_numerical, self.scaler_categorical], + ["scaler_numerical", "scaler_categorical"], + ): + if ( + not callable(getattr(scaler, "fit", None)) + or not callable(getattr(scaler, "transform", None)) + or not callable(getattr(scaler, "inverse_transform", None)) + ): + raise_log( + ValueError( + f"The provided `{scaler_name}` object must have fit(), transform() and " + f"inverse_transform() methods" + ), + logger, + ) + + self._numeric_col_mask = None + + def fit( + self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs + ) -> "FittableDataTransformer": + + self._fit_called = True + + if isinstance(series, TimeSeries): + data = series.static_covariates + else: + data = pd.concat([s.static_covariates for s in series], axis=0) + + self._numeric_col_mask = data.columns.isin( + data.select_dtypes(include=np.number).columns + ) + data = data.to_numpy(copy=False) + self.scaler_numerical.fit(data[:, self._numeric_col_mask]) + self.scaler_categorical.fit(data[:, ~self._numeric_col_mask]) + return self + + def transform( + self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs + ) -> Union[TimeSeries, List[TimeSeries]]: + kwargs = {key: val for key, val in kwargs.items()} + kwargs["component_mask"] = self._numeric_col_mask + return super().transform(series, *args, **kwargs) + + def inverse_transform( + self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs + ) -> Union[TimeSeries, List[TimeSeries]]: + kwargs = {key: val for key, val in kwargs.items()} + kwargs["component_mask"] = self._numeric_col_mask + return super().inverse_transform(series, *args, **kwargs) + + @staticmethod + def ts_fit(series: TimeSeries): + raise NotImplementedError( + "StaticCovariatesTransformer does not use method `ts_fit()`" + ) + + @staticmethod + def ts_transform( + series: TimeSeries, transformer_cont, transformer_cat, **kwargs + ) -> TimeSeries: + component_mask = kwargs.get("component_mask") + assert component_mask is not None + vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( + series, component_mask=component_mask + ) + tr_out_cont = transformer_cont.transform(vals_cont) + tr_out_cat = transformer_cat.transform(vals_cat) + + transformed_vals = StaticCovariatesTransformer._reshape_out( + series, (tr_out_cont, tr_out_cat), component_mask=component_mask + ) + + return series.with_static_covariates( + pd.DataFrame( + transformed_vals, + columns=series.static_covariates.columns, + index=series.static_covariates.index, + ) + ) + + @staticmethod + def ts_inverse_transform( + series: TimeSeries, transformer_cont, transformer_cat, **kwargs + ) -> TimeSeries: + component_mask = kwargs.get("component_mask") + assert component_mask is not None + vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( + series, component_mask=component_mask + ) + tr_out_cont = transformer_cont.inverse_transform(vals_cont) + tr_out_cat = transformer_cat.inverse_transform(vals_cat) + + transformed_vals = StaticCovariatesTransformer._reshape_out( + series, (tr_out_cont, tr_out_cat), component_mask=component_mask + ) + + return series.with_static_covariates( + pd.DataFrame( + transformed_vals, + columns=series.static_covariates.columns, + index=series.static_covariates.index, + ) + ) + + def _transform_iterator( + self, series: Sequence[TimeSeries] + ) -> Iterator[Tuple[TimeSeries, Any, Any]]: + # since '_ts_fit()' returns the scaler objects, the 'fit()' call will save transformers instances into + # self.scaler_numerical and self.scaler_categorical + return zip( + series, + [self.scaler_numerical] * len(series), + [self.scaler_categorical] * len(series), + ) + + def _inverse_transform_iterator( + self, series: Sequence[TimeSeries] + ) -> Iterator[Tuple[TimeSeries, Any, Any]]: + # the same self.scaler_numerical and self.scaler_categorical will be used also for the 'ts_inverse_transform()' + return zip( + series, + [self.scaler_numerical] * len(series), + [self.scaler_categorical] * len(series), + ) + + @staticmethod + def _reshape_in( + series: TimeSeries, component_mask: Optional[np.ndarray] = None + ) -> Tuple[np.array, np.array]: + assert component_mask is not None + + # component mask points at continuous variables + vals = series.static_covariates_values(copy=False) + + # returns tuple of (continuous static covariates, categorical static covariates) + return vals[:, component_mask], vals[:, ~component_mask] + + @staticmethod + def _reshape_out( + series: TimeSeries, + vals: Tuple[np.ndarray, np.ndarray], + component_mask: Optional[np.ndarray] = None, + ) -> pd.DataFrame: + assert component_mask is not None + + vals_cont, vals_cat = vals + data = {} + idx_cont, idx_cat = 0, 0 + for col, is_numeric in zip(series.static_covariates.columns, component_mask): + if is_numeric: + data[col] = vals_cont[:, idx_cont] + idx_cont += 1 + else: + data[col] = vals_cat[:, idx_cat] + idx_cat += 1 + return pd.DataFrame( + data, + columns=series.static_covariates.columns, + index=series.static_covariates.index, + ) diff --git a/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py b/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py new file mode 100644 index 0000000000..593e14c428 --- /dev/null +++ b/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py @@ -0,0 +1,98 @@ +import numpy as np +import pandas as pd + +from darts import TimeSeries +from darts.dataprocessing.transformers import StaticCovariatesTransformer +from darts.tests.base_test_class import DartsBaseTestClass +from darts.utils import timeseries_generation as tg + + +class DataTransformerTestCase(DartsBaseTestClass): + series = tg.linear_timeseries(length=10) + static_covs1 = pd.DataFrame( + data={ + "cont1": [0, 1, 2], + "cat1": [1, 2, 3], + "cont2": [0.1, 0.2, 0.3], + "cat2": ["a", "b", "c"], + } + ) + static_covs1["cat1"] = static_covs1["cat1"].astype("O") + series1 = TimeSeries.from_times_and_values( + times=series.time_index, + values=np.concatenate([series.values()] * 3, axis=1), + columns=["comp1", "comp2", "comp3"], + static_covariates=static_covs1, + ) + + static_covs2 = pd.DataFrame( + data={ + "cont1": [2, 3, 4], + "cat1": [3, 4, 5], + "cont2": [0.3, 0.4, 0.5], + "cat2": ["c", "d", "e"], + } + ) + static_covs2["cat1"] = static_covs2["cat1"].astype("O") + series2 = TimeSeries.from_times_and_values( + times=series.time_index, + values=np.concatenate([series.values()] * 3, axis=1), + columns=["comp1", "comp2", "comp3"], + static_covariates=static_covs2, + ) + + def test_scaling_single_series(self): + # 3 categories for each categorical static covariate column (column idx 1 and 3) + for series in [self.series1, self.series2]: + scaler = StaticCovariatesTransformer() + series_tr = scaler.fit_transform(series) + + np.testing.assert_almost_equal( + series_tr.static_covariates_values(), + np.array( + [[0.0, 0.0, 0.0, 0.0], [0.5, 1.0, 0.5, 1.0], [1.0, 2.0, 1.0, 2.0]] + ), + ) + series_recovered = scaler.inverse_transform(series_tr) + self.assertTrue( + series.static_covariates.equals(series_recovered.static_covariates) + ) + + def test_scaling_multi_series(self): + # 5 categories in total for each categorical static covariate from multiple time series + scaler2 = StaticCovariatesTransformer() + series_tr2 = scaler2.fit_transform([self.series1, self.series2]) + + np.testing.assert_almost_equal( + series_tr2[0].static_covariates_values(), + np.array( + [[0.0, 0.0, 0.0, 0.0], [0.25, 1.0, 0.25, 1.0], [0.5, 2.0, 0.5, 2.0]] + ), + ) + series_recovered2 = scaler2.inverse_transform(series_tr2[0]) + self.assertTrue( + self.series1.static_covariates.equals(series_recovered2.static_covariates) + ) + + np.testing.assert_almost_equal( + series_tr2[1].static_covariates_values(), + np.array( + [[0.5, 2.0, 0.5, 2.0], [0.75, 3.0, 0.75, 3.0], [1.0, 4.0, 1.0, 4.0]] + ), + ) + series_recovered3 = scaler2.inverse_transform(series_tr2[1]) + self.assertTrue( + self.series2.static_covariates.equals(series_recovered3.static_covariates) + ) + + series_recovered_multi = scaler2.inverse_transform(series_tr2) + self.assertTrue( + self.series1.static_covariates.equals( + series_recovered_multi[0].static_covariates + ) + ) + self.assertTrue( + self.series2.static_covariates.equals( + series_recovered_multi[1].static_covariates + ) + ) From 6aa1635f49df805d3a7ae75254b1bee937cf46c2 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sat, 9 Jul 2022 12:21:33 +0200 Subject: [PATCH 04/16] OneHotEncoder support for StaticCovariatesTransformer --- .../static_covariates_transformer.py | 123 +++++++++++++----- .../test_static_covariates_transformer.py | 81 +++++++++--- 2 files changed, 152 insertions(+), 52 deletions(-) diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index a96444ab3c..ed5ca38a6f 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -2,19 +2,19 @@ Static Covariates Transformer ------ """ - -from typing import Any, Iterator, List, Optional, Sequence, Tuple, Union +from collections import OrderedDict +from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union import numpy as np import pandas as pd -from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder +from scipy.sparse import csr_matrix +from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, OrdinalEncoder from darts.logging import get_logger, raise_log from darts.timeseries import TimeSeries from .fittable_data_transformer import FittableDataTransformer from .invertible_data_transformer import InvertibleDataTransformer -from .scaler import Scaler logger = get_logger(__name__) @@ -75,15 +75,12 @@ def __init__( [2.] """ super().__init__(name=name, n_jobs=n_jobs, verbose=verbose) - - self.transformer_cont = Scaler( - scaler=scaler_numerical, name=name, n_jobs=n_jobs, verbose=verbose + self.scaler_numerical = ( + MinMaxScaler() if scaler_numerical is None else scaler_numerical + ) + self.scaler_categorical = ( + OrdinalEncoder() if scaler_categorical is None else scaler_categorical ) - - if scaler_numerical is None: - self.scaler_numerical = MinMaxScaler(feature_range=(0, 1)) - if scaler_categorical is None: - self.scaler_categorical = OrdinalEncoder() for scaler, scaler_name in zip( [self.scaler_numerical, self.scaler_categorical], @@ -102,6 +99,8 @@ def __init__( logger, ) + # categoricals might need a mapping from input features to output (i.e. OneHotEncoding) + self._cat_feature_map = None self._numeric_col_mask = None def fit( @@ -118,9 +117,23 @@ def fit( self._numeric_col_mask = data.columns.isin( data.select_dtypes(include=np.number).columns ) + cat_cols = data.columns[~self._numeric_col_mask] + data = data.to_numpy(copy=False) self.scaler_numerical.fit(data[:, self._numeric_col_mask]) self.scaler_categorical.fit(data[:, ~self._numeric_col_mask]) + + if isinstance(self.scaler_categorical, OneHotEncoder): + self._cat_feature_map = OrderedDict( + { + col: [f"{col}_{cat}" for cat in categories] + for col, categories in zip( + cat_cols, self.scaler_categorical.categories_ + ) + } + ) + else: + self._cat_feature_map = OrderedDict({col: [col] for col in cat_cols}) return self def transform( @@ -128,13 +141,32 @@ def transform( ) -> Union[TimeSeries, List[TimeSeries]]: kwargs = {key: val for key, val in kwargs.items()} kwargs["component_mask"] = self._numeric_col_mask + kwargs["cat_feature_map"] = self._cat_feature_map return super().transform(series, *args, **kwargs) def inverse_transform( self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs ) -> Union[TimeSeries, List[TimeSeries]]: kwargs = {key: val for key, val in kwargs.items()} - kwargs["component_mask"] = self._numeric_col_mask + + component_mask = [] + cat_features = [len(vals) for vals in self._cat_feature_map.values()] + cat_idx = 0 + for col, is_numeric in zip( + series.static_covariates.columns, self._numeric_col_mask + ): + if is_numeric: + component_mask.append(True) + else: + component_mask += [False] * cat_features[cat_idx] + cat_idx += 1 + + kwargs["component_mask"] = np.array(component_mask) + kwargs["cat_feature_map"] = { + name: [col] + for col, names in self._cat_feature_map.items() + for name in names + } return super().inverse_transform(series, *args, **kwargs) @staticmethod @@ -148,48 +180,48 @@ def ts_transform( series: TimeSeries, transformer_cont, transformer_cat, **kwargs ) -> TimeSeries: component_mask = kwargs.get("component_mask") - assert component_mask is not None + cat_feature_map = kwargs.get("cat_feature_map") + vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( series, component_mask=component_mask ) tr_out_cont = transformer_cont.transform(vals_cont) tr_out_cat = transformer_cat.transform(vals_cat) - transformed_vals = StaticCovariatesTransformer._reshape_out( - series, (tr_out_cont, tr_out_cat), component_mask=component_mask - ) + # sparse one hot encoding to dense array + if isinstance(tr_out_cat, csr_matrix): + tr_out_cat = tr_out_cat.toarray() - return series.with_static_covariates( - pd.DataFrame( - transformed_vals, - columns=series.static_covariates.columns, - index=series.static_covariates.index, - ) + transformed_df = StaticCovariatesTransformer._reshape_out( + series, + (tr_out_cont, tr_out_cat), + component_mask=component_mask, + cat_feature_map=cat_feature_map, ) + return series.with_static_covariates(transformed_df) + @staticmethod def ts_inverse_transform( series: TimeSeries, transformer_cont, transformer_cat, **kwargs ) -> TimeSeries: component_mask = kwargs.get("component_mask") - assert component_mask is not None + cat_feature_map = kwargs.get("cat_feature_map") + vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( series, component_mask=component_mask ) tr_out_cont = transformer_cont.inverse_transform(vals_cont) tr_out_cat = transformer_cat.inverse_transform(vals_cat) - transformed_vals = StaticCovariatesTransformer._reshape_out( - series, (tr_out_cont, tr_out_cat), component_mask=component_mask + transformed_df = StaticCovariatesTransformer._reshape_out( + series, + (tr_out_cont, tr_out_cat), + component_mask=component_mask, + cat_feature_map=cat_feature_map, ) - return series.with_static_covariates( - pd.DataFrame( - transformed_vals, - columns=series.static_covariates.columns, - index=series.static_covariates.index, - ) - ) + return series.with_static_covariates(transformed_df) def _transform_iterator( self, series: Sequence[TimeSeries] @@ -229,21 +261,40 @@ def _reshape_out( series: TimeSeries, vals: Tuple[np.ndarray, np.ndarray], component_mask: Optional[np.ndarray] = None, + cat_feature_map: Optional[Dict[str, str]] = None, ) -> pd.DataFrame: assert component_mask is not None + assert cat_feature_map is not None vals_cont, vals_cat = vals + assert ( + len( + np.unique( + [name for names in cat_feature_map.values() for name in names] + ) + ) + == vals_cat.shape[1] + ) + data = {} idx_cont, idx_cat = 0, 0 + static_cov_columns = [] for col, is_numeric in zip(series.static_covariates.columns, component_mask): if is_numeric: data[col] = vals_cont[:, idx_cont] + static_cov_columns.append(col) idx_cont += 1 else: - data[col] = vals_cat[:, idx_cat] - idx_cat += 1 + # coverts one to one feature map (ordinal/label encoding) and one to multi feature (one hot encoding) + for col_name in cat_feature_map[col]: + if col_name not in static_cov_columns: + data[col_name] = vals_cat[:, idx_cat] + static_cov_columns.append(col_name) + idx_cat += 1 + else: + pass return pd.DataFrame( data, - columns=series.static_covariates.columns, + columns=static_cov_columns, index=series.static_covariates.index, ) diff --git a/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py b/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py index 593e14c428..045b4d438c 100644 --- a/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py +++ b/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py @@ -1,5 +1,7 @@ import numpy as np import pandas as pd +import pytest +from sklearn.preprocessing import MinMaxScaler, OneHotEncoder from darts import TimeSeries from darts.dataprocessing.transformers import StaticCovariatesTransformer @@ -7,7 +9,7 @@ from darts.utils import timeseries_generation as tg -class DataTransformerTestCase(DartsBaseTestClass): +class StaticCovariatesTransformerTestCase(DartsBaseTestClass): series = tg.linear_timeseries(length=10) static_covs1 = pd.DataFrame( data={ @@ -43,25 +45,62 @@ class DataTransformerTestCase(DartsBaseTestClass): def test_scaling_single_series(self): # 3 categories for each categorical static covariate column (column idx 1 and 3) + test_values = np.array( + [[0.0, 0.0, 0.0, 0.0], [0.5, 1.0, 0.5, 1.0], [1.0, 2.0, 1.0, 2.0]] + ) for series in [self.series1, self.series2]: scaler = StaticCovariatesTransformer() - series_tr = scaler.fit_transform(series) + self.helper_test_scaling(series, scaler, test_values) - np.testing.assert_almost_equal( - series_tr.static_covariates_values(), - np.array( - [[0.0, 0.0, 0.0, 0.0], [0.5, 1.0, 0.5, 1.0], [1.0, 2.0, 1.0, 2.0]] - ), - ) - series_recovered = scaler.inverse_transform(series_tr) - self.assertTrue( - series.static_covariates.equals(series_recovered.static_covariates) + test_values = np.array( + [[-1.0, 0.0, -1.0, 0.0], [0.0, 1.0, 0.0, 1.0], [1.0, 2.0, 1.0, 2.0]] + ) + for series in [self.series1, self.series2]: + scaler = StaticCovariatesTransformer( + scaler_numerical=MinMaxScaler(feature_range=(-1, 1)) ) + self.helper_test_scaling(series, scaler, test_values) + + test_values = np.array( + [ + [0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], + [0.5, 0.0, 1.0, 0.0, 0.5, 0.0, 1.0, 0.0], + [1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0], + ] + ) + for series in [self.series1, self.series2]: + scaler = StaticCovariatesTransformer(scaler_categorical=OneHotEncoder()) + self.helper_test_scaling(series, scaler, test_values) + + def test_custom_scaler(self): + # invalid scaler with missing inverse_transform + class InvalidScaler: + def fit(self): + pass + + def transform(self): + pass + + with pytest.raises(ValueError): + _ = StaticCovariatesTransformer(scaler_numerical=InvalidScaler()) + + with pytest.raises(ValueError): + _ = StaticCovariatesTransformer(scaler_categorical=InvalidScaler()) + + class ValidScaler(InvalidScaler): + def inverse_transform(self): + pass + + _ = StaticCovariatesTransformer(scaler_numerical=ValidScaler()) + _ = StaticCovariatesTransformer(scaler_categorical=ValidScaler()) + _ = StaticCovariatesTransformer( + scaler_numerical=ValidScaler(), scaler_categorical=ValidScaler() + ) def test_scaling_multi_series(self): # 5 categories in total for each categorical static covariate from multiple time series - scaler2 = StaticCovariatesTransformer() - series_tr2 = scaler2.fit_transform([self.series1, self.series2]) + scaler = StaticCovariatesTransformer() + series_tr2 = scaler.fit_transform([self.series1, self.series2]) np.testing.assert_almost_equal( series_tr2[0].static_covariates_values(), @@ -69,7 +108,7 @@ def test_scaling_multi_series(self): [[0.0, 0.0, 0.0, 0.0], [0.25, 1.0, 0.25, 1.0], [0.5, 2.0, 0.5, 2.0]] ), ) - series_recovered2 = scaler2.inverse_transform(series_tr2[0]) + series_recovered2 = scaler.inverse_transform(series_tr2[0]) self.assertTrue( self.series1.static_covariates.equals(series_recovered2.static_covariates) ) @@ -80,12 +119,12 @@ def test_scaling_multi_series(self): [[0.5, 2.0, 0.5, 2.0], [0.75, 3.0, 0.75, 3.0], [1.0, 4.0, 1.0, 4.0]] ), ) - series_recovered3 = scaler2.inverse_transform(series_tr2[1]) + series_recovered3 = scaler.inverse_transform(series_tr2[1]) self.assertTrue( self.series2.static_covariates.equals(series_recovered3.static_covariates) ) - series_recovered_multi = scaler2.inverse_transform(series_tr2) + series_recovered_multi = scaler.inverse_transform(series_tr2) self.assertTrue( self.series1.static_covariates.equals( series_recovered_multi[0].static_covariates @@ -96,3 +135,13 @@ def test_scaling_multi_series(self): series_recovered_multi[1].static_covariates ) ) + + def helper_test_scaling(self, series, scaler, test_values): + series_tr = scaler.fit_transform(series) + np.testing.assert_almost_equal( + series_tr.static_covariates_values(), test_values + ) + series_recovered = scaler.inverse_transform(series_tr) + self.assertTrue( + series.static_covariates.equals(series_recovered.static_covariates) + ) From 95b9b84f335c4121bb5cfbb9131b137cc50d375c Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sat, 9 Jul 2022 12:29:39 +0200 Subject: [PATCH 05/16] small fix --- .../transformers/static_covariates_transformer.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index ed5ca38a6f..5ce54511cd 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -147,14 +147,19 @@ def transform( def inverse_transform( self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs ) -> Union[TimeSeries, List[TimeSeries]]: + kwargs = {key: val for key, val in kwargs.items()} - component_mask = [] cat_features = [len(vals) for vals in self._cat_feature_map.values()] + static_covs = ( + series.static_covariates + if isinstance(series, TimeSeries) + else series[0].static_covariates + ) + + component_mask = [] cat_idx = 0 - for col, is_numeric in zip( - series.static_covariates.columns, self._numeric_col_mask - ): + for col, is_numeric in zip(static_covs.columns, self._numeric_col_mask): if is_numeric: component_mask.append(True) else: @@ -285,7 +290,7 @@ def _reshape_out( static_cov_columns.append(col) idx_cont += 1 else: - # coverts one to one feature map (ordinal/label encoding) and one to multi feature (one hot encoding) + # covers one to one feature map (ordinal/label encoding) and one to multi feature (one hot encoding) for col_name in cat_feature_map[col]: if col_name not in static_cov_columns: data[col_name] = vals_cat[:, idx_cat] From c82272f7efe52ec1776ad7880f11cfe9854aced2 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 10 Jul 2022 14:16:32 +0200 Subject: [PATCH 06/16] TFTModel static covariate handling --- .../static_covariates_transformer.py | 62 ++++++++++------- darts/models/forecasting/block_rnn_model.py | 4 ++ darts/models/forecasting/nbeats.py | 4 ++ darts/models/forecasting/nhits.py | 4 ++ darts/models/forecasting/rnn_model.py | 4 ++ darts/models/forecasting/tcn_model.py | 4 ++ darts/models/forecasting/tft_model.py | 67 ++++++++++--------- .../forecasting/torch_forecasting_model.py | 27 ++++++++ darts/models/forecasting/transformer_model.py | 4 ++ 9 files changed, 123 insertions(+), 57 deletions(-) diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index 5ce54511cd..89140f9ca1 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -120,20 +120,21 @@ def fit( cat_cols = data.columns[~self._numeric_col_mask] data = data.to_numpy(copy=False) - self.scaler_numerical.fit(data[:, self._numeric_col_mask]) - self.scaler_categorical.fit(data[:, ~self._numeric_col_mask]) - - if isinstance(self.scaler_categorical, OneHotEncoder): - self._cat_feature_map = OrderedDict( - { - col: [f"{col}_{cat}" for cat in categories] - for col, categories in zip( - cat_cols, self.scaler_categorical.categories_ - ) - } - ) - else: - self._cat_feature_map = OrderedDict({col: [col] for col in cat_cols}) + if sum(self._numeric_col_mask): + self.scaler_numerical.fit(data[:, self._numeric_col_mask]) + if sum(~self._numeric_col_mask): + self.scaler_categorical.fit(data[:, ~self._numeric_col_mask]) + if isinstance(self.scaler_categorical, OneHotEncoder): + self._cat_feature_map = OrderedDict( + { + col: [f"{col}_{cat}" for cat in categories] + for col, categories in zip( + cat_cols, self.scaler_categorical.categories_ + ) + } + ) + else: + self._cat_feature_map = OrderedDict({col: [col] for col in cat_cols}) return self def transform( @@ -167,11 +168,13 @@ def inverse_transform( cat_idx += 1 kwargs["component_mask"] = np.array(component_mask) - kwargs["cat_feature_map"] = { - name: [col] - for col, names in self._cat_feature_map.items() - for name in names - } + kwargs["cat_feature_map"] = OrderedDict( + { + name: [col] + for col, names in self._cat_feature_map.items() + for name in names + } + ) return super().inverse_transform(series, *args, **kwargs) @staticmethod @@ -190,12 +193,16 @@ def ts_transform( vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( series, component_mask=component_mask ) - tr_out_cont = transformer_cont.transform(vals_cont) - tr_out_cat = transformer_cat.transform(vals_cat) - # sparse one hot encoding to dense array - if isinstance(tr_out_cat, csr_matrix): - tr_out_cat = tr_out_cat.toarray() + tr_out_cont, tr_out_cat = None, None + if sum(component_mask): + tr_out_cont = transformer_cont.transform(vals_cont) + if sum(~component_mask): + tr_out_cat = transformer_cat.transform(vals_cat) + + # sparse one hot encoding to dense array + if isinstance(tr_out_cat, csr_matrix): + tr_out_cat = tr_out_cat.toarray() transformed_df = StaticCovariatesTransformer._reshape_out( series, @@ -216,8 +223,11 @@ def ts_inverse_transform( vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( series, component_mask=component_mask ) - tr_out_cont = transformer_cont.inverse_transform(vals_cont) - tr_out_cat = transformer_cat.inverse_transform(vals_cat) + tr_out_cont, tr_out_cat = None, None + if sum(component_mask): + tr_out_cont = transformer_cont.inverse_transform(vals_cont) + if sum(~component_mask): + tr_out_cat = transformer_cat.inverse_transform(vals_cat) transformed_df = StaticCovariatesTransformer._reshape_out( series, diff --git a/darts/models/forecasting/block_rnn_model.py b/darts/models/forecasting/block_rnn_model.py index 1eadf6b993..3dd8e9d606 100644 --- a/darts/models/forecasting/block_rnn_model.py +++ b/darts/models/forecasting/block_rnn_model.py @@ -325,6 +325,10 @@ def __init__( self.n_rnn_layers = n_rnn_layers self.dropout = dropout + @staticmethod + def _supports_static_covariates() -> bool: + return False + def _create_model(self, train_sample: Tuple[torch.Tensor]) -> torch.nn.Module: # samples are made of (past_target, past_covariates, future_target) input_dim = train_sample[0].shape[1] + ( diff --git a/darts/models/forecasting/nbeats.py b/darts/models/forecasting/nbeats.py index ace92aec02..189ec9c93b 100644 --- a/darts/models/forecasting/nbeats.py +++ b/darts/models/forecasting/nbeats.py @@ -762,6 +762,10 @@ def __init__( if isinstance(layer_widths, int): self.layer_widths = [layer_widths] * num_stacks + @staticmethod + def _supports_static_covariates() -> bool: + return False + def _create_model(self, train_sample: Tuple[torch.Tensor]) -> torch.nn.Module: # samples are made of (past_target, past_covariates, future_target) input_dim = train_sample[0].shape[1] + ( diff --git a/darts/models/forecasting/nhits.py b/darts/models/forecasting/nhits.py index a1e158a43c..7bd06b7a37 100644 --- a/darts/models/forecasting/nhits.py +++ b/darts/models/forecasting/nhits.py @@ -757,6 +757,10 @@ def _check_sizes(tup, name): return pooling_kernel_sizes, n_freq_downsample + @staticmethod + def _supports_static_covariates() -> bool: + return False + def _create_model(self, train_sample: Tuple[torch.Tensor]) -> torch.nn.Module: # samples are made of (past_target, past_covariates, future_target) input_dim = train_sample[0].shape[1] + ( diff --git a/darts/models/forecasting/rnn_model.py b/darts/models/forecasting/rnn_model.py index 0900224a01..2f709be84a 100644 --- a/darts/models/forecasting/rnn_model.py +++ b/darts/models/forecasting/rnn_model.py @@ -466,3 +466,7 @@ def _verify_train_dataset_type(self, train_dataset: TrainingDataset): train_dataset.ds_past.shift == 1, "RNNModel requires a shifted training dataset with shift=1.", ) + + @staticmethod + def _supports_static_covariates() -> bool: + return False diff --git a/darts/models/forecasting/tcn_model.py b/darts/models/forecasting/tcn_model.py index 834d6e7509..008805e146 100644 --- a/darts/models/forecasting/tcn_model.py +++ b/darts/models/forecasting/tcn_model.py @@ -490,3 +490,7 @@ def _build_train_dataset( shift=self.output_chunk_length, max_samples_per_ts=max_samples_per_ts, ) + + @staticmethod + def _supports_static_covariates() -> bool: + return False diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py index 1c572507f8..9bab80f543 100644 --- a/darts/models/forecasting/tft_model.py +++ b/darts/models/forecasting/tft_model.py @@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union import numpy as np +import pandas as pd import torch from torch import nn from torch.nn import LSTM as _LSTM @@ -84,9 +85,12 @@ def __init__( Defaults to `GatedResidualNetwork`. hidden_continuous_size : int default for hidden size for processing continuous variables. - categorical_embedding_sizes : int - Embedding size for categorical static covariates. Only effective if the target series contains - categorical (non-numeric) static covariates. + categorical_embedding_sizes : dict + A dictionary containing embedding sizes for categorical static covariates. The keys are the column names + of the categorical static covariates. The values are tuples of integers with + `(number of unique categories, embedding size)`. For example `{"some_column": (64, 8)}`. + Note that `TorchForecastingModels` can only handle numeric data. Consider transforming/encoding your data + with `darts.dataprocessing.transformers.static_covariates_transformer.StaticCovariatesTransformer`. dropout : float Fraction of neurons affected by Dropout. add_relative_index : bool @@ -480,16 +484,19 @@ def forward( # Embedding and variable selection if self.static_variables: # categorical static covariate embeddings - static_embedding = self.input_embeddings( - torch.cat( - [ - x_static[:, :, idx] - for idx, name in enumerate(self.static_variables) - if name in self.categorical_static_variables - ], - dim=1, - ).int() - ) + if self.categorical_static_variables: + static_embedding = self.input_embeddings( + torch.cat( + [ + x_static[:, :, idx] + for idx, name in enumerate(self.static_variables) + if name in self.categorical_static_variables + ], + dim=1, + ).int() + ) + else: + static_embedding = {} # add numerical static covariates static_embedding.update( { @@ -963,26 +970,20 @@ def _create_model(self, train_sample: MixedCovariatesTrainTensorType) -> nn.Modu time_varying_decoder_input += vars_meta reals_input += vars_meta elif input_var in ["static_covariate"]: - static_covs = self.static_covariates - static_covs_is_real = static_covs.columns.isin( - static_covs.select_dtypes(include=np.number) - ) - cat_cols = static_covs.columns[~static_covs_is_real] - missing_embeddings = [ - col - for col in cat_cols - if col not in self.categorical_embedding_sizes - ] - raise_if( - len(missing_embeddings) > 0, - f"Missing embedding sizes for categorical static covarites: {missing_embeddings}", - logger, - ) - for idx, (static_var, col_name, is_real) in enumerate( - zip(vars_meta, static_covs.columns, static_covs_is_real) + if ( + self.static_covariates is None + ): # when training with fit_from_dataset + static_cols = pd.Index( + [i for i in range(static_covariates.shape[1])] + ) + else: + static_cols = self.static_covariates.columns + numeric_mask = ~static_cols.isin(self.categorical_embedding_sizes) + for idx, (static_var, col_name, is_numeric) in enumerate( + zip(vars_meta, static_cols, numeric_mask) ): static_input.append(static_var) - if is_real: + if is_numeric: static_input_numeric.append(static_var) reals_input.append(static_var) else: @@ -1083,6 +1084,10 @@ def _build_inference_dataset( output_chunk_length=self.output_chunk_length, ) + @staticmethod + def _supports_static_covariates() -> bool: + return True + def predict(self, n, *args, **kwargs): # since we have future covariates, the inference dataset for future input must be at least of length # `output_chunk_length`. If not, we would have to step back which causes past input to be shorter than diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py index c81ee5d21a..f66a819fa7 100644 --- a/darts/models/forecasting/torch_forecasting_model.py +++ b/darts/models/forecasting/torch_forecasting_model.py @@ -26,6 +26,7 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union import numpy as np +import pandas as pd import pytorch_lightning as pl import torch from pytorch_lightning import loggers as pl_loggers @@ -607,6 +608,30 @@ def _verify_past_future_covariates(self, past_covariates, future_covariates): """ pass + @staticmethod + @abstractmethod + def _supports_static_covariates() -> bool: + """ + Whether model supports static covariates + """ + pass + + def _verify_static_covariates(self, static_covariates: Optional[pd.DataFrame]): + """ + Verify that all static covariates are numeric. + """ + if static_covariates is not None and self._supports_static_covariates(): + numeric_mask = static_covariates.columns.isin( + static_covariates.select_dtypes(include=np.number) + ) + raise_if( + sum(~numeric_mask), + "TorchForecastingModels can only interpret numeric static covariate data. Consider " + "encoding/transforming categorical static covariates with " + "`darts.dataprocessing.transformers.static_covariates_transformer.StaticCovariatesTransformer`.", + logger, + ) + @random_method def fit( self, @@ -694,6 +719,7 @@ def fit( self._verify_past_future_covariates( past_covariates=past_covariates, future_covariates=future_covariates ) + self._verify_static_covariates(series[0].static_covariates) def wrap_fn( ts: Union[TimeSeries, Sequence[TimeSeries]] @@ -1056,6 +1082,7 @@ def predict( else future_covariates ) + self._verify_static_covariates(series[0].static_covariates) # encoders are set when calling fit(), but not when calling fit_from_dataset() if self.encoders is not None and self.encoders.encoding_available: past_covariates, future_covariates = self.encoders.encode_inference( diff --git a/darts/models/forecasting/transformer_model.py b/darts/models/forecasting/transformer_model.py index e7e9fe9352..e33768b2e2 100644 --- a/darts/models/forecasting/transformer_model.py +++ b/darts/models/forecasting/transformer_model.py @@ -454,3 +454,7 @@ def _create_model(self, train_sample: Tuple[torch.Tensor]) -> torch.nn.Module: custom_decoder=self.custom_decoder, **self.pl_module_params, ) + + @staticmethod + def _supports_static_covariates() -> bool: + return False From da1143bc50e35b390a28301a145a3f77c36d8787 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Mon, 11 Jul 2022 17:54:00 +0200 Subject: [PATCH 07/16] improved transformer with specifying which columns to transform --- .../static_covariates_transformer.py | 219 ++++++++++-------- .../test_static_covariates_transformer.py | 74 +++++- 2 files changed, 189 insertions(+), 104 deletions(-) diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index 89140f9ca1..a620f454c9 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -22,32 +22,42 @@ class StaticCovariatesTransformer(InvertibleDataTransformer, FittableDataTransformer): def __init__( self, - scaler_numerical=None, - scaler_categorical=None, + scaler_num=None, + scaler_cat=None, + cols_num: Optional[List[str]] = None, + cols_cat: Optional[List[str]] = None, name="StaticCovariatesTransformer", n_jobs: int = 1, verbose: bool = False, ): """Generic wrapper class for scalers/encoders/transformers of static covariates. - The underlying `scaler_numerical` and `scaler_categorical` have to implement the ``fit()``, ``transform()`` + The underlying `scaler_num` and `scaler_cat` have to implement the ``fit()``, ``transform()`` and ``inverse_transform()`` methods (typically from scikit-learn). - `scaler_numerical` addresses numerical static covariate data of the underlying series. - `scaler_categorical` addresses categorical static covariate data. + `scaler_num` addresses numerical static covariate data of the underlying series. + `scaler_cat` addresses categorical static covariate data. Parameters ---------- - scaler_numerical + scaler_num The scaler to transform numeric static covariate data with. It must provide ``fit()``, ``transform()`` and ``inverse_transform()`` methods. Default: :class:`sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all the values of a time series between 0 and 1. - scaler_categorical + scaler_cat The scaler to transform categorical static covariate data with. It must provide ``fit()``, ``transform()`` and ``inverse_transform()`` methods. Default: :class:`sklearn.preprocessing.OrdinalEncoder(feature_range=(0, 1))`; this will convert categories into integer valued arrays where each integer stands for a specific category. + cols_num + Optionally, a list of column names which for which to apply the numeric transformer `scaler_num`. + By default, the transformer will infer all numerical features and scale them with `scaler_num`. + If an empty list, no column will be scaled. + cols_cat + Optionally, a list of column names which for which to apply the categorical transformer `scaler_cat`. + By default, the transformer will infer all categorical features and transform them with `scaler_cat`. + If an empty list, no column will be transformed. name A specific name for the scaler n_jobs @@ -64,27 +74,26 @@ def __init__( >>> from darts.datasets import AirPassengersDataset >>> from sklearn.preprocessing import MinMaxScaler, OrdicalEncoder >>> from darts.dataprocessing.transformers import StaticCovariatesTransformer - >>> series = AirPassengersDataset().load() - >>> scaler_num = MinMaxScaler(feature_range=(-1, 1)) - >>> scaler_cat = OrdinalEncoder() - >>> transformer = StaticCovariatesTransformer(scaler_numerical=scaler_num, scaler_categorical=scaler_cat) + >>> static_covs = pd.DataFrame(data={"cont": [0, 1, 2], "cat": ["a", "b", "c"]}) + >>> series = TimeSeries.from_values( + >>> values=np.random.random((10, 3)), + >>> columns=["comp1", "comp2", "comp3"], + >>> static_covariates=static_covs, + >>> ) + >>> transformer = StaticCovariatesTransformer() >>> series_transformed = transformer.fit_transform(series) - >>> print(series.static_covariates_values()) + >>> print(series.static_covariates) [-1.] - >>> print(series_transformed.static_covariates_values()) + >>> print(series_transformed.static_covariates) [2.] """ super().__init__(name=name, n_jobs=n_jobs, verbose=verbose) - self.scaler_numerical = ( - MinMaxScaler() if scaler_numerical is None else scaler_numerical - ) - self.scaler_categorical = ( - OrdinalEncoder() if scaler_categorical is None else scaler_categorical - ) + self.scaler_num = MinMaxScaler() if scaler_num is None else scaler_num + self.scaler_cat = OrdinalEncoder() if scaler_cat is None else scaler_cat for scaler, scaler_name in zip( - [self.scaler_numerical, self.scaler_categorical], - ["scaler_numerical", "scaler_categorical"], + [self.scaler_num, self.scaler_cat], + ["scaler_num", "scaler_cat"], ): if ( not callable(getattr(scaler, "fit", None)) @@ -99,9 +108,15 @@ def __init__( logger, ) + # numeric/categorical cols will be inferred at fitting time, if user did not set them + self.cols = None + self.cols_num = cols_num + self.cols_cat = cols_cat + self.mask_num = None + self.mask_cat = None + # categoricals might need a mapping from input features to output (i.e. OneHotEncoding) - self._cat_feature_map = None - self._numeric_col_mask = None + self.col_map_cat = None def fit( self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs @@ -113,36 +128,47 @@ def fit( data = series.static_covariates else: data = pd.concat([s.static_covariates for s in series], axis=0) + self.cols = data.columns - self._numeric_col_mask = data.columns.isin( - data.select_dtypes(include=np.number).columns - ) - cat_cols = data.columns[~self._numeric_col_mask] + # get all numeric and categorical columns + mask_num = data.columns.isin(data.select_dtypes(include=np.number).columns) + mask_cat = ~mask_num + + # infer numeric and categorical columns if user didn't supply them at transformer construction + if self.cols_num is None: + self.cols_num = data.columns[mask_num] + if self.cols_cat is None: + self.cols_cat = data.columns[mask_cat] + + self.mask_num = data.columns.isin(self.cols_num) + self.mask_cat = data.columns.isin(self.cols_cat) data = data.to_numpy(copy=False) - if sum(self._numeric_col_mask): - self.scaler_numerical.fit(data[:, self._numeric_col_mask]) - if sum(~self._numeric_col_mask): - self.scaler_categorical.fit(data[:, ~self._numeric_col_mask]) - if isinstance(self.scaler_categorical, OneHotEncoder): - self._cat_feature_map = OrderedDict( + if sum(self.mask_num): + self.scaler_num.fit(data[:, self.mask_num]) + if sum(self.mask_cat): + self.scaler_cat.fit(data[:, self.mask_cat]) + if isinstance(self.scaler_cat, OneHotEncoder): + self.col_map_cat = OrderedDict( { col: [f"{col}_{cat}" for cat in categories] for col, categories in zip( - cat_cols, self.scaler_categorical.categories_ + self.cols_cat, self.scaler_cat.categories_ ) } ) else: - self._cat_feature_map = OrderedDict({col: [col] for col in cat_cols}) + self.col_map_cat = OrderedDict({col: [col] for col in self.cols_cat}) + else: + self.col_map_cat = {} return self def transform( self, series: Union[TimeSeries, Sequence[TimeSeries]], *args, **kwargs ) -> Union[TimeSeries, List[TimeSeries]]: kwargs = {key: val for key, val in kwargs.items()} - kwargs["component_mask"] = self._numeric_col_mask - kwargs["cat_feature_map"] = self._cat_feature_map + kwargs["component_mask"] = (self.mask_num, self.mask_cat) + kwargs["col_map_cat"] = self.col_map_cat return super().transform(series, *args, **kwargs) def inverse_transform( @@ -151,29 +177,27 @@ def inverse_transform( kwargs = {key: val for key, val in kwargs.items()} - cat_features = [len(vals) for vals in self._cat_feature_map.values()] - static_covs = ( - series.static_covariates - if isinstance(series, TimeSeries) - else series[0].static_covariates - ) - - component_mask = [] + cat_features = [len(vals) for vals in self.col_map_cat.values()] + component_mask_num, component_mask_cat = [], [] cat_idx = 0 - for col, is_numeric in zip(static_covs.columns, self._numeric_col_mask): - if is_numeric: - component_mask.append(True) - else: - component_mask += [False] * cat_features[cat_idx] + for col, is_num, is_cat in zip(self.cols, self.mask_num, self.mask_cat): + if is_num: + component_mask_num.append(True) + component_mask_cat.append(False) + elif is_cat: + component_mask_num += [False] * cat_features[cat_idx] + component_mask_cat += [True] * cat_features[cat_idx] cat_idx += 1 + else: # don't scale this feature/column + component_mask_num.append(False) + component_mask_cat.append(False) - kwargs["component_mask"] = np.array(component_mask) - kwargs["cat_feature_map"] = OrderedDict( - { - name: [col] - for col, names in self._cat_feature_map.items() - for name in names - } + kwargs["component_mask"] = ( + np.array(component_mask_num), + np.array(component_mask_cat), + ) + kwargs["col_map_cat"] = OrderedDict( + {name: [col] for col, names in self.col_map_cat.items() for name in names} ) return super().inverse_transform(series, *args, **kwargs) @@ -187,17 +211,17 @@ def ts_fit(series: TimeSeries): def ts_transform( series: TimeSeries, transformer_cont, transformer_cat, **kwargs ) -> TimeSeries: - component_mask = kwargs.get("component_mask") - cat_feature_map = kwargs.get("cat_feature_map") + component_mask_num, component_mask_cat = kwargs.get("component_mask") + col_map_cat = kwargs.get("col_map_cat") vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( - series, component_mask=component_mask + series, component_mask=(component_mask_num, component_mask_cat) ) tr_out_cont, tr_out_cat = None, None - if sum(component_mask): + if sum(component_mask_num): tr_out_cont = transformer_cont.transform(vals_cont) - if sum(~component_mask): + if sum(component_mask_cat): tr_out_cat = transformer_cat.transform(vals_cat) # sparse one hot encoding to dense array @@ -207,8 +231,8 @@ def ts_transform( transformed_df = StaticCovariatesTransformer._reshape_out( series, (tr_out_cont, tr_out_cat), - component_mask=component_mask, - cat_feature_map=cat_feature_map, + component_mask=(component_mask_num, component_mask_cat), + col_map_cat=col_map_cat, ) return series.with_static_covariates(transformed_df) @@ -217,23 +241,23 @@ def ts_transform( def ts_inverse_transform( series: TimeSeries, transformer_cont, transformer_cat, **kwargs ) -> TimeSeries: - component_mask = kwargs.get("component_mask") - cat_feature_map = kwargs.get("cat_feature_map") + component_mask_num, component_mask_cat = kwargs.get("component_mask") + col_map_cat = kwargs.get("col_map_cat") vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( - series, component_mask=component_mask + series, component_mask=(component_mask_num, component_mask_cat) ) tr_out_cont, tr_out_cat = None, None - if sum(component_mask): + if sum(component_mask_num): tr_out_cont = transformer_cont.inverse_transform(vals_cont) - if sum(~component_mask): + if sum(component_mask_cat): tr_out_cat = transformer_cat.inverse_transform(vals_cat) transformed_df = StaticCovariatesTransformer._reshape_out( series, (tr_out_cont, tr_out_cat), - component_mask=component_mask, - cat_feature_map=cat_feature_map, + component_mask=(component_mask_num, component_mask_cat), + col_map_cat=col_map_cat, ) return series.with_static_covariates(transformed_df) @@ -242,72 +266,81 @@ def _transform_iterator( self, series: Sequence[TimeSeries] ) -> Iterator[Tuple[TimeSeries, Any, Any]]: # since '_ts_fit()' returns the scaler objects, the 'fit()' call will save transformers instances into - # self.scaler_numerical and self.scaler_categorical + # self.scaler_num and self.scaler_cat return zip( series, - [self.scaler_numerical] * len(series), - [self.scaler_categorical] * len(series), + [self.scaler_num] * len(series), + [self.scaler_cat] * len(series), ) def _inverse_transform_iterator( self, series: Sequence[TimeSeries] ) -> Iterator[Tuple[TimeSeries, Any, Any]]: - # the same self.scaler_numerical and self.scaler_categorical will be used also for the 'ts_inverse_transform()' + # the same self.scaler_num and self.scaler_cat will be used also for the 'ts_inverse_transform()' return zip( series, - [self.scaler_numerical] * len(series), - [self.scaler_categorical] * len(series), + [self.scaler_num] * len(series), + [self.scaler_cat] * len(series), ) @staticmethod def _reshape_in( - series: TimeSeries, component_mask: Optional[np.ndarray] = None + series: TimeSeries, + component_mask: Optional[Tuple[np.ndarray, np.ndarray]] = None, ) -> Tuple[np.array, np.array]: assert component_mask is not None + component_mask_num, component_mask_cat = component_mask # component mask points at continuous variables vals = series.static_covariates_values(copy=False) # returns tuple of (continuous static covariates, categorical static covariates) - return vals[:, component_mask], vals[:, ~component_mask] + return vals[:, component_mask_num], vals[:, component_mask_cat] @staticmethod def _reshape_out( series: TimeSeries, vals: Tuple[np.ndarray, np.ndarray], - component_mask: Optional[np.ndarray] = None, - cat_feature_map: Optional[Dict[str, str]] = None, + component_mask: Optional[Tuple[np.ndarray, np.ndarray]] = None, + col_map_cat: Optional[Dict[str, str]] = None, ) -> pd.DataFrame: assert component_mask is not None - assert cat_feature_map is not None + assert col_map_cat is not None + component_mask_num, component_mask_cat = component_mask vals_cont, vals_cat = vals - assert ( - len( - np.unique( - [name for names in cat_feature_map.values() for name in names] - ) - ) - == vals_cat.shape[1] + + n_cat_cols = len( + np.unique([name for names in col_map_cat.values() for name in names]) ) + if vals_cat is None: + assert n_cat_cols == 0 + else: + assert n_cat_cols == vals_cat.shape[1] data = {} idx_cont, idx_cat = 0, 0 static_cov_columns = [] - for col, is_numeric in zip(series.static_covariates.columns, component_mask): - if is_numeric: + for col, is_num, is_cat in zip( + series.static_covariates.columns, component_mask_num, component_mask_cat + ): + if is_num: # numeric scaled column data[col] = vals_cont[:, idx_cont] static_cov_columns.append(col) idx_cont += 1 - else: + elif is_cat: # categorical transformed column # covers one to one feature map (ordinal/label encoding) and one to multi feature (one hot encoding) - for col_name in cat_feature_map[col]: + for col_name in col_map_cat[col]: if col_name not in static_cov_columns: data[col_name] = vals_cat[:, idx_cat] static_cov_columns.append(col_name) idx_cat += 1 else: pass + else: # is_num and is_cat are False -> feature not part of transformer, use original values + data[col] = series.static_covariates[col] + static_cov_columns.append(col) + return pd.DataFrame( data, columns=static_cov_columns, diff --git a/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py b/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py index 045b4d438c..36a62bd7b5 100644 --- a/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py +++ b/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py @@ -18,8 +18,7 @@ class StaticCovariatesTransformerTestCase(DartsBaseTestClass): "cont2": [0.1, 0.2, 0.3], "cat2": ["a", "b", "c"], } - ) - static_covs1["cat1"] = static_covs1["cat1"].astype("O") + ).astype(dtype={"cat1": "O", "cat2": "O"}) series1 = TimeSeries.from_times_and_values( times=series.time_index, values=np.concatenate([series.values()] * 3, axis=1), @@ -57,7 +56,7 @@ def test_scaling_single_series(self): ) for series in [self.series1, self.series2]: scaler = StaticCovariatesTransformer( - scaler_numerical=MinMaxScaler(feature_range=(-1, 1)) + scaler_num=MinMaxScaler(feature_range=(-1, 1)) ) self.helper_test_scaling(series, scaler, test_values) @@ -69,9 +68,55 @@ def test_scaling_single_series(self): ] ) for series in [self.series1, self.series2]: - scaler = StaticCovariatesTransformer(scaler_categorical=OneHotEncoder()) + scaler = StaticCovariatesTransformer(scaler_cat=OneHotEncoder()) self.helper_test_scaling(series, scaler, test_values) + def test_single_type_scaler(self): + scaler_cont = StaticCovariatesTransformer() + series_cont = self.series1.with_static_covariates( + self.series1.static_covariates[["cont1", "cont2"]] + ) + test_cont = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]]) + self.helper_test_scaling(series_cont, scaler_cont, test_cont) + + scaler_cat = StaticCovariatesTransformer() + series_cat = self.series1.with_static_covariates( + self.series1.static_covariates[["cat1", "cat2"]] + ) + test_cat = np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]]) + self.helper_test_scaling(series_cat, scaler_cat, test_cat) + + def test_selected_columns(self): + test_cont = ( + pd.DataFrame( + [[0.0, 1, 0.0, "a"], [0.5, 2, 0.5, "b"], [1.0, 3, 1.0, "c"]], + ) + .astype(dtype={1: "O", 3: "O"}) + .values + ) + scaler_cont2 = StaticCovariatesTransformer( + cols_num=["cont1", "cont2"], cols_cat=[] + ) + self.helper_test_scaling(self.series1, scaler_cont2, test_cont) + + test_contcat = ( + pd.DataFrame( + [[0.0, 1, 0.0, 0.0], [1.0, 2, 0.5, 1.0], [2.0, 3, 1.0, 2.0]], + ) + .astype(dtype={1: "O"}) + .values + ) + scaler_contcat = StaticCovariatesTransformer( + cols_num=["cont2"], cols_cat=["cat2"] + ) + self.helper_test_scaling(self.series1, scaler_contcat, test_contcat) + + test_cat = pd.DataFrame( + [[0.0, 0.0, 0.1, 0.0], [1.0, 1.0, 0.2, 1], [2.0, 2.0, 0.3, 2.0]], + ).values + scaler_cat = StaticCovariatesTransformer(cols_num=[], cols_cat=["cat1", "cat2"]) + self.helper_test_scaling(self.series1, scaler_cat, test_cat) + def test_custom_scaler(self): # invalid scaler with missing inverse_transform class InvalidScaler: @@ -82,19 +127,19 @@ def transform(self): pass with pytest.raises(ValueError): - _ = StaticCovariatesTransformer(scaler_numerical=InvalidScaler()) + _ = StaticCovariatesTransformer(scaler_num=InvalidScaler()) with pytest.raises(ValueError): - _ = StaticCovariatesTransformer(scaler_categorical=InvalidScaler()) + _ = StaticCovariatesTransformer(scaler_cat=InvalidScaler()) class ValidScaler(InvalidScaler): def inverse_transform(self): pass - _ = StaticCovariatesTransformer(scaler_numerical=ValidScaler()) - _ = StaticCovariatesTransformer(scaler_categorical=ValidScaler()) + _ = StaticCovariatesTransformer(scaler_num=ValidScaler()) + _ = StaticCovariatesTransformer(scaler_cat=ValidScaler()) _ = StaticCovariatesTransformer( - scaler_numerical=ValidScaler(), scaler_categorical=ValidScaler() + scaler_num=ValidScaler(), scaler_cat=ValidScaler() ) def test_scaling_multi_series(self): @@ -138,9 +183,16 @@ def test_scaling_multi_series(self): def helper_test_scaling(self, series, scaler, test_values): series_tr = scaler.fit_transform(series) - np.testing.assert_almost_equal( - series_tr.static_covariates_values(), test_values + assert all( + [ + a == b + for a, b in zip( + series_tr.static_covariates_values().flatten(), + test_values.flatten(), + ) + ] ) + series_recovered = scaler.inverse_transform(series_tr) self.assertTrue( series.static_covariates.equals(series_recovered.static_covariates) From a2744382cecf4934a55edae6c60f660a3f30bfda Mon Sep 17 00:00:00 2001 From: dennisbader Date: Tue, 12 Jul 2022 10:49:13 +0200 Subject: [PATCH 08/16] docs improvements --- .../static_covariates_transformer.py | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index a620f454c9..439f1aee9a 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -32,11 +32,15 @@ def __init__( ): """Generic wrapper class for scalers/encoders/transformers of static covariates. - The underlying `scaler_num` and `scaler_cat` have to implement the ``fit()``, ``transform()`` + The underlying ``scaler_num`` and ``scaler_cat`` have to implement the ``fit()``, ``transform()`` and ``inverse_transform()`` methods (typically from scikit-learn). - `scaler_num` addresses numerical static covariate data of the underlying series. - `scaler_cat` addresses categorical static covariate data. + By default, numerical and categorical columns/features are inferred and allocated to ``scaler_num`` and + ``scaler_cat``, respectively. Alternatively, specify which columns to scale/transform with ``cols_num`` and + ``cols_cat``. + + Both ``scaler_num`` and ``scaler_cat`` are fit globally on static covariate data from all series passed + to ``StaticCovariatesTransformer.fit()`` Parameters ---------- @@ -48,7 +52,7 @@ def __init__( scaler_cat The scaler to transform categorical static covariate data with. It must provide ``fit()``, ``transform()`` and ``inverse_transform()`` methods. - Default: :class:`sklearn.preprocessing.OrdinalEncoder(feature_range=(0, 1))`; this will convert categories + Default: :class:`sklearn.preprocessing.OrdinalEncoder()`; this will convert categories into integer valued arrays where each integer stands for a specific category. cols_num Optionally, a list of column names which for which to apply the numeric transformer `scaler_num`. @@ -71,10 +75,11 @@ def __init__( Examples -------- - >>> from darts.datasets import AirPassengersDataset - >>> from sklearn.preprocessing import MinMaxScaler, OrdicalEncoder + >>> import numpy as np + >>> import pandas as pd + >>> from darts import TimeSeries >>> from darts.dataprocessing.transformers import StaticCovariatesTransformer - >>> static_covs = pd.DataFrame(data={"cont": [0, 1, 2], "cat": ["a", "b", "c"]}) + >>> static_covs = pd.DataFrame(data={"cont": [0, 2, 1], "cat": ["a", "c", "b"]}) >>> series = TimeSeries.from_values( >>> values=np.random.random((10, 3)), >>> columns=["comp1", "comp2", "comp3"], @@ -83,9 +88,17 @@ def __init__( >>> transformer = StaticCovariatesTransformer() >>> series_transformed = transformer.fit_transform(series) >>> print(series.static_covariates) - [-1.] + static_covariates cont cat + component + comp1 0.0 a + comp2 2.0 c + comp3 1.0 b >>> print(series_transformed.static_covariates) - [2.] + static_covariates cont cat + component + comp1 0.0 0.0 + comp2 1.0 2.0 + comp3 0.5 1.0 """ super().__init__(name=name, n_jobs=n_jobs, verbose=verbose) self.scaler_num = MinMaxScaler() if scaler_num is None else scaler_num From aef7e09c9881f159295c953070696b4a9c523c1f Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 17 Jul 2022 10:35:46 +0200 Subject: [PATCH 09/16] docstring improvement for StaticCvoariatesTransformer --- .../static_covariates_transformer.py | 85 ++++++++++--------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index 439f1aee9a..3216c2ba2c 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -40,30 +40,30 @@ def __init__( ``cols_cat``. Both ``scaler_num`` and ``scaler_cat`` are fit globally on static covariate data from all series passed - to ``StaticCovariatesTransformer.fit()`` + to :class:`StaticCovariatesTransformer.fit()` Parameters ---------- scaler_num - The scaler to transform numeric static covariate data with. It must provide ``fit()``, + The scaler to transform numeric static covariate columns with. It must provide ``fit()``, ``transform()`` and ``inverse_transform()`` methods. Default: :class:`sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all - the values of a time series between 0 and 1. + values between 0 and 1. scaler_cat - The scaler to transform categorical static covariate data with. It must provide ``fit()``, + The encoder to transform categorical static covariate columns with. It must provide ``fit()``, ``transform()`` and ``inverse_transform()`` methods. Default: :class:`sklearn.preprocessing.OrdinalEncoder()`; this will convert categories into integer valued arrays where each integer stands for a specific category. cols_num - Optionally, a list of column names which for which to apply the numeric transformer `scaler_num`. + Optionally, a list of column names for which to apply the numeric transformer ``scaler_num``. By default, the transformer will infer all numerical features and scale them with `scaler_num`. If an empty list, no column will be scaled. cols_cat - Optionally, a list of column names which for which to apply the categorical transformer `scaler_cat`. + Optionally, a list of column names for which to apply the categorical transformer `scaler_cat`. By default, the transformer will infer all categorical features and transform them with `scaler_cat`. If an empty list, no column will be transformed. name - A specific name for the scaler + A specific name for the :class:`StaticCovariatesTransformer`. n_jobs The number of jobs to run in parallel. Parallel jobs are created only when a ``Sequence[TimeSeries]`` is passed as input to a method, parallelising operations regarding different ``TimeSeries``. Defaults to `1` @@ -79,7 +79,7 @@ def __init__( >>> import pandas as pd >>> from darts import TimeSeries >>> from darts.dataprocessing.transformers import StaticCovariatesTransformer - >>> static_covs = pd.DataFrame(data={"cont": [0, 2, 1], "cat": ["a", "c", "b"]}) + >>> static_covs = pd.DataFrame(data={"num": [0, 2, 1], "cat": ["a", "c", "b"]}) >>> series = TimeSeries.from_values( >>> values=np.random.random((10, 3)), >>> columns=["comp1", "comp2", "comp3"], @@ -88,13 +88,13 @@ def __init__( >>> transformer = StaticCovariatesTransformer() >>> series_transformed = transformer.fit_transform(series) >>> print(series.static_covariates) - static_covariates cont cat + static_covariates num cat component comp1 0.0 a comp2 2.0 c comp3 1.0 b >>> print(series_transformed.static_covariates) - static_covariates cont cat + static_covariates num cat component comp1 0.0 0.0 comp2 1.0 2.0 @@ -123,10 +123,8 @@ def __init__( # numeric/categorical cols will be inferred at fitting time, if user did not set them self.cols = None - self.cols_num = cols_num - self.cols_cat = cols_cat - self.mask_num = None - self.mask_cat = None + self.cols_num, self.cols_cat = cols_num, cols_cat + self.mask_num, self.mask_cat = None, None # categoricals might need a mapping from input features to output (i.e. OneHotEncoding) self.col_map_cat = None @@ -162,6 +160,7 @@ def fit( if sum(self.mask_cat): self.scaler_cat.fit(data[:, self.mask_cat]) if isinstance(self.scaler_cat, OneHotEncoder): + # OneHotEncoder will generate more features, create a 1-many column map for that self.col_map_cat = OrderedDict( { col: [f"{col}_{cat}" for cat in categories] @@ -171,6 +170,7 @@ def fit( } ) else: + # other encoders will have a 1-1 column map self.col_map_cat = OrderedDict({col: [col] for col in self.cols_cat}) else: self.col_map_cat = {} @@ -190,6 +190,7 @@ def inverse_transform( kwargs = {key: val for key, val in kwargs.items()} + # check how many categorical features were generated per categorical column after transforming the data cat_features = [len(vals) for vals in self.col_map_cat.values()] component_mask_num, component_mask_cat = [], [] cat_idx = 0 @@ -198,6 +199,7 @@ def inverse_transform( component_mask_num.append(True) component_mask_cat.append(False) elif is_cat: + # some categorical encoders (OneHotEncoder) generate more features and we need to keep track of that component_mask_num += [False] * cat_features[cat_idx] component_mask_cat += [True] * cat_features[cat_idx] cat_idx += 1 @@ -221,19 +223,18 @@ def ts_fit(series: TimeSeries): ) @staticmethod - def ts_transform( - series: TimeSeries, transformer_cont, transformer_cat, **kwargs - ) -> TimeSeries: + def ts_transform(series: TimeSeries, *args, **kwargs) -> TimeSeries: + transformer_num, transformer_cat = args component_mask_num, component_mask_cat = kwargs.get("component_mask") col_map_cat = kwargs.get("col_map_cat") - vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( + vals_num, vals_cat = StaticCovariatesTransformer._reshape_in( series, component_mask=(component_mask_num, component_mask_cat) ) - tr_out_cont, tr_out_cat = None, None + tr_out_num, tr_out_cat = None, None if sum(component_mask_num): - tr_out_cont = transformer_cont.transform(vals_cont) + tr_out_num = transformer_num.transform(vals_num) if sum(component_mask_cat): tr_out_cat = transformer_cat.transform(vals_cat) @@ -243,7 +244,7 @@ def ts_transform( transformed_df = StaticCovariatesTransformer._reshape_out( series, - (tr_out_cont, tr_out_cat), + (tr_out_num, tr_out_cat), component_mask=(component_mask_num, component_mask_cat), col_map_cat=col_map_cat, ) @@ -251,24 +252,23 @@ def ts_transform( return series.with_static_covariates(transformed_df) @staticmethod - def ts_inverse_transform( - series: TimeSeries, transformer_cont, transformer_cat, **kwargs - ) -> TimeSeries: + def ts_inverse_transform(series: TimeSeries, *args, **kwargs) -> TimeSeries: + transformer_num, transformer_cat = args component_mask_num, component_mask_cat = kwargs.get("component_mask") col_map_cat = kwargs.get("col_map_cat") - vals_cont, vals_cat = StaticCovariatesTransformer._reshape_in( + vals_num, vals_cat = StaticCovariatesTransformer._reshape_in( series, component_mask=(component_mask_num, component_mask_cat) ) - tr_out_cont, tr_out_cat = None, None + tr_out_num, tr_out_cat = None, None if sum(component_mask_num): - tr_out_cont = transformer_cont.inverse_transform(vals_cont) + tr_out_num = transformer_num.inverse_transform(vals_num) if sum(component_mask_cat): tr_out_cat = transformer_cat.inverse_transform(vals_cat) transformed_df = StaticCovariatesTransformer._reshape_out( series, - (tr_out_cont, tr_out_cat), + (tr_out_num, tr_out_cat), component_mask=(component_mask_num, component_mask_cat), col_map_cat=col_map_cat, ) @@ -278,8 +278,7 @@ def ts_inverse_transform( def _transform_iterator( self, series: Sequence[TimeSeries] ) -> Iterator[Tuple[TimeSeries, Any, Any]]: - # since '_ts_fit()' returns the scaler objects, the 'fit()' call will save transformers instances into - # self.scaler_num and self.scaler_cat + # use numerical and categorical transformers for 'ts_transform()' return zip( series, [self.scaler_num] * len(series), @@ -289,7 +288,7 @@ def _transform_iterator( def _inverse_transform_iterator( self, series: Sequence[TimeSeries] ) -> Iterator[Tuple[TimeSeries, Any, Any]]: - # the same self.scaler_num and self.scaler_cat will be used also for the 'ts_inverse_transform()' + # use numerical and categorical transformers for 'ts_inverse_transform()' return zip( series, [self.scaler_num] * len(series), @@ -301,13 +300,12 @@ def _reshape_in( series: TimeSeries, component_mask: Optional[Tuple[np.ndarray, np.ndarray]] = None, ) -> Tuple[np.array, np.array]: + # we expect component mask to be (numeric component mask, categorical component mask) assert component_mask is not None component_mask_num, component_mask_cat = component_mask - # component mask points at continuous variables + # returns tuple of (numeric static covariates, categorical static covariates) vals = series.static_covariates_values(copy=False) - - # returns tuple of (continuous static covariates, categorical static covariates) return vals[:, component_mask_num], vals[:, component_mask_cat] @staticmethod @@ -317,30 +315,36 @@ def _reshape_out( component_mask: Optional[Tuple[np.ndarray, np.ndarray]] = None, col_map_cat: Optional[Dict[str, str]] = None, ) -> pd.DataFrame: + # we expect component mask to be (numeric component mask, categorical component mask) assert component_mask is not None + component_mask_num, component_mask_cat = component_mask + + # `col_map_cat` contains information to which features the original categorical feature were mapped + # (i.e. 1-1 mapping for OrdinalEncoder, or 1-many mapping for OneHotEncoder) assert col_map_cat is not None - component_mask_num, component_mask_cat = component_mask - vals_cont, vals_cat = vals + vals_num, vals_cat = vals + # infer the number of categorical output features n_cat_cols = len( np.unique([name for names in col_map_cat.values() for name in names]) ) + # quick check if everything is in order if vals_cat is None: assert n_cat_cols == 0 else: assert n_cat_cols == vals_cat.shape[1] data = {} - idx_cont, idx_cat = 0, 0 + idx_num, idx_cat = 0, 0 static_cov_columns = [] for col, is_num, is_cat in zip( series.static_covariates.columns, component_mask_num, component_mask_cat ): if is_num: # numeric scaled column - data[col] = vals_cont[:, idx_cont] + data[col] = vals_num[:, idx_num] static_cov_columns.append(col) - idx_cont += 1 + idx_num += 1 elif is_cat: # categorical transformed column # covers one to one feature map (ordinal/label encoding) and one to multi feature (one hot encoding) for col_name in col_map_cat[col]: @@ -350,10 +354,11 @@ def _reshape_out( idx_cat += 1 else: pass - else: # is_num and is_cat are False -> feature not part of transformer, use original values + else: # is_num and is_cat are False -> feature is not part of transformer, use original values data[col] = series.static_covariates[col] static_cov_columns.append(col) + # returns a pandas DataFrame of static covariates to be added to the series return pd.DataFrame( data, columns=static_cov_columns, From 91770f0eab748bd679da9885b6b09bd9c50e1b84 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 17 Jul 2022 13:21:17 +0200 Subject: [PATCH 10/16] added static covariates notebook example --- .github/workflows/merge.yml | 2 +- docs/source/examples.rst | 11 + docs/userguide/covariates.md | 70 +- examples/14-transfer-learning.ipynb | 2 +- examples/15-static-covariates.ipynb | 997 ++++++++++++++++++++++++++++ 5 files changed, 1053 insertions(+), 29 deletions(-) create mode 100644 examples/15-static-covariates.ipynb diff --git a/.github/workflows/merge.yml b/.github/workflows/merge.yml index cbedaf58f6..86aa674b15 100644 --- a/.github/workflows/merge.yml +++ b/.github/workflows/merge.yml @@ -87,7 +87,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - example-name: [00-quickstart.ipynb, 01-multi-time-series-and-covariates.ipynb, 02-data-processing.ipynb, 03-FFT-examples.ipynb, 04-RNN-examples.ipynb, 05-TCN-examples.ipynb, 06-Transformer-examples.ipynb, 07-NBEATS-examples.ipynb, 08-DeepAR-examples.ipynb, 09-DeepTCN-examples.ipynb, 10-Kalman-filter-examples.ipynb, 11-GP-filter-examples.ipynb, 12-Dynamic-Time-Warping-example.ipynb, 13-TFT-examples.ipynb] + example-name: [00-quickstart.ipynb, 01-multi-time-series-and-covariates.ipynb, 02-data-processing.ipynb, 03-FFT-examples.ipynb, 04-RNN-examples.ipynb, 05-TCN-examples.ipynb, 06-Transformer-examples.ipynb, 07-NBEATS-examples.ipynb, 08-DeepAR-examples.ipynb, 09-DeepTCN-examples.ipynb, 10-Kalman-filter-examples.ipynb, 11-GP-filter-examples.ipynb, 12-Dynamic-Time-Warping-example.ipynb, 13-TFT-examples.ipynb, 15-static-covariates.ipynb] steps: - name: "1. Clone repository" uses: actions/checkout@v2 diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 8228854a21..8c279ae2a6 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -29,6 +29,17 @@ Data processing example notebook: examples/02-data-processing.ipynb +Static Covariates +================= + +Static covariates example notebook: + +.. toctree:: + :maxdepth: 1 + + examples/15-static-covariates.ipynb + + Transfer Learning Tutorial ========================== diff --git a/docs/userguide/covariates.md b/docs/userguide/covariates.md index db770b89a2..7b8dd5055e 100644 --- a/docs/userguide/covariates.md +++ b/docs/userguide/covariates.md @@ -1,5 +1,6 @@ # Covariates -This section was written for darts version 0.15.0 and later. +Sections about past and future covariates were written for darts version 0.15.0 and later. +Sections about static covariates were written for darts version 0.20.0 and later. ## Summary - TL;DR In Darts, **covariates** refer to external data that can be used as inputs to models to help improve forecasts. @@ -8,7 +9,7 @@ covariates themselves are not predicted. We distinguish three kinds of covariate * **past covariates** are (by definition) covariates known only into the past (e.g. measurements) * **future covariates** are (by definition) covariates known into the future (e.g., weather forecasts) -* **static covariates** are (by definition) covariates constant over time. They are not yet supported in Darts, but we are working on it! +* **static covariates** are (by definition) covariates constant over time (e.g., product IDs). Check out our static covariates example notebook [here](https://unit8co.github.io/darts/examples/15-static-covariates.html) for more information. Models in Darts accept `past_covariates` and/or `future_covariates` in their `fit()` and `predict()` methods, depending on their capabilities (some models accept no covariates at all). Both target and covariates must be a `TimeSeries` object. The models will raise an error if covariates were used that are not supported. ```python @@ -26,6 +27,7 @@ model.predict(n=12, past_covariates=past_covariates_pred, future_covariates=future_covariates_pred) ``` +Different to past and future covariates, static covariates must be embedded in the **target** series. Because of that, working with static covariates follows a different methodology. You can check out how to use static covariates in [this example](https://unit8co.github.io/darts/examples/15-static-covariates.html). If you have several covariate variables that you want to use as past (or future) covariates, you have to `stack()` all of them into a single `past_covariates` (or `future_covariates`) object. @@ -71,22 +73,35 @@ of the past covariates, and using auto-regression on the target series. If you w ## Introduction - What are covariates (in Darts)? -Covariates provide additional information/context that can be useful to improve the prediction of the `target` series. The `target` series is the variable we wish to predict the future for. We do not predict the covariates themselves, only use them for prediction of the `target`. +Past, future and static covariates provide additional information/context that can be useful to improve the prediction of the `target` series. The `target` series is the variable we wish to predict the future for. We do not predict the covariates themselves, only use them for prediction of the `target`. -Covariates can hold information about the past (upto and including present time) or future. This is always relative to the prediction point (in time) after which we want to forecast the future. -In Darts, we refer to these two types as `past_covariates` and `future_covariates`. Darts' forecasting models have different support modes for `*_covariates`. Some do not support covariates at all, others support either past or future covariates and some support both (more on that in [this subsection](#forecasting-model-covariate-support)). +Past and future covariates hold information about the past (upto and including present time) or future. This is always relative to the prediction point (in time) after which we want to forecast the future. +In Darts, we refer to these two types as `past_covariates` and `future_covariates`. -Let's have a look at some examples of past and future covariates: +Static covariates hold time independent (constant / static) information about the `target` series. We refer to them as `static_covariates`. They must be embedded in the `target` series. Working with static covariates follows a slightly different approach than with past or future covariates. Check out our [notebook on static covariates](https://unit8co.github.io/darts/examples/15-static-covariates.html) to learn more. + +Darts' forecasting models have different support modes for `*_covariates`. Some do not support covariates at all, others support only past or future covariates and some even support all three (more on that in [this subsection](#forecasting-model-covariate-support)). + +Let's have a look at some examples of past, future, and static covariates: - `past_covariates`: typically measurements (past data) or temporal attributes - daily average **measured** temperatures (known only in the past) - day of week, month, year, ... - `future_covariates`: typically forecasts (future known data) or temporal attributes - daily average **forecasted** temperatures (known in the future) - day of week, month, year, ... +- `static_covariates`: time independent/constant/static `target` characterstics + - categorical: + - location of `target` (country, city, .. name) + - `target` identifier: (product ID, store ID, ...) + - numerical: + - population of `target`'s country/market area (assuming it stays constant over the forecasting horizon) + - average temperature of `target`'s region (assuming it stays constant over the forecasting horizon) + Temporal attributes are powerful because they are known in advance and can help models capture trends and / or seasonal patterns of the `target` series. +Static attributes are powerful when working with multiple `targets`. The time independent information can help models identify the nature/environment of the underlying series and improve forecasts across different `targets`. -Here's a simple rule-of-thumb to know if your series are past or future covariates: +In this guide we'll focus on past and future covariates. Here's a simple rule-of-thumb to know if your series are **past** or **future covariates**: *If the values are known in advance, they are future covariates (or can be used as past covariates). If they are not, they **must** be past covariates.* @@ -96,7 +111,7 @@ Side note: if you don't have future values (e.g. of measured temperatures), noth ## Forecasting Model Covariate Support -Darts' forecasting models accept optional `past_covariates` and / or `future_covariates` in their `fit()` and `predict()` methods, depending on their capabilities. Table 1 shows the supported covariate types for each model. The models will raise an error if covariates were used that are not supported. +Darts' forecasting models accept optional `past_covariates` and / or `future_covariates` in their `fit()` and `predict()` methods (and `static_covariates` embedded in the `target` series), depending on their capabilities. Table 1 shows the supported covariate types for each model. The models will raise an error if covariates were used that are not supported. ### Local Forecasting Models (LFMs): LFMs are models that can be trained on a single target series only. In Darts most models in this category tend to be simpler statistical models (such as ETS or ARIMA). LFMs accept only a single `target` (and covariate) time series and usually train on the entire series you supplied when calling `fit()` at once. They can also predict in one go for any number of predictions `n` after the end of the training series. @@ -106,24 +121,24 @@ GFMs are broadly speaking "machine learning based" models, which denote PyTorch- ---- -Model | Past Covariates | Future Covariates ---- | :---: | :---: -**Local Forecasting Models (LFMs)** | | -`ExponentialSmoothing` | | -`Theta` and `FourTheta` | | -`FFT` | | -`ARIMA` | | ✅ -`VARIMA` | | ✅ -`AutoARIMA` | | ✅ -`Prophet` | | ✅ -**Global Forecasting Models (GFMs)** | | -`RegressionModel`* | ✅ | ✅ -`RNNModel`** | | ✅ -`BlockRNNModel`*** | ✅ | -`NBEATSModel` | ✅ | -`TCNModel` | ✅ | -`TransformerModel` | ✅ | -`TFTModel` | ✅ | ✅ +Model | Past Covariates | Future Covariates | Static Covariates +--- | :---: | :---: | :---: +**Local Forecasting Models (LFMs)** | | | +`ExponentialSmoothing` | | | +`Theta` and `FourTheta` | | | +`FFT` | | | +`ARIMA` | | ✅ | +`VARIMA` | | ✅ | +`AutoARIMA` | | ✅ | +`Prophet` | | ✅ | +**Global Forecasting Models (GFMs)** | | | +`RegressionModel`* | ✅ | ✅ | +`RNNModel`** | | ✅ | +`BlockRNNModel`*** | ✅ | | +`NBEATSModel` | ✅ | | +`TCNModel` | ✅ | | +`TransformerModel` | ✅ | | +`TFTModel` | ✅ | ✅ | ✅ **Table 1: Darts' forecasting models and their covariate support** @@ -138,7 +153,7 @@ and past targets to do predictions. ---- -## Quick guide on how to use covariates with Darts' forecasting models +## Quick guide on how to use past and/or future covariates with Darts' forecasting models It is very simple to use covariates with Darts' forecasting models. There are just some requirements they have to fulfill. Just like the `target` series, each of your past and / or future covariates series must be a `TimeSeries` object. When you train your model with `fit()` using past and /or future covariates, you have to supply the same types of covariates to `predict()`. Depending on the choice of your model and how long your forecast horizon `n` is, there might be different time span requirements for your covariates. You can find these requirements in the [next subsection](#covariate-time-span-requirements-for-local-and-global-forecasting-models). @@ -223,6 +238,7 @@ If you want to know more details about how covariates are used behind the scenes ## Examples Here are a few examples showcasing how to use covariates with Darts forecasting models: +- [Static covariates tutorial including TFTModel example](https://unit8co.github.io/darts/examples/15-static-covariates.html) - [Past covariates with GFMs](https://unit8co.github.io/darts/examples/01-multi-time-series-and-covariates.html#Covariates-Series) - [Past and future covariates with TFTModel](https://unit8co.github.io/darts/examples/13-TFT-examples.html#Training) - [Past and future covariates with RegressionModels](https://medium.com/unit8-machine-learning-publication/time-series-forecasting-using-past-and-future-external-data-with-darts-1f0539585993) diff --git a/examples/14-transfer-learning.ipynb b/examples/14-transfer-learning.ipynb index 717f8f28c0..5e223a883d 100644 --- a/examples/14-transfer-learning.ipynb +++ b/examples/14-transfer-learning.ipynb @@ -2257,7 +2257,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/examples/15-static-covariates.ipynb b/examples/15-static-covariates.ipynb new file mode 100644 index 0000000000..724fc1192d --- /dev/null +++ b/examples/15-static-covariates.ipynb @@ -0,0 +1,997 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "711bc155-8d7d-46e6-9ca0-b4b859515066", + "metadata": {}, + "outputs": [], + "source": [ + "# fix python path if working locally\n", + "from utils import fix_pythonpath_if_working_locally\n", + "\n", + "fix_pythonpath_if_working_locally()" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d3da8657-2bff-43e1-a96f-db149fa9f409", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a46ecf12-0d29-459f-bd2a-90ae4c745df6", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from darts import TimeSeries\n", + "\n", + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")\n", + "import logging\n", + "\n", + "logging.disable(logging.CRITICAL)" + ] + }, + { + "cell_type": "markdown", + "id": "8f32480c-db7d-4e9f-8ebc-bccf004d817e", + "metadata": {}, + "source": [ + "# Static Covariates\n", + "\n", + "Static covariates are characteristics of a time series / constants which do not change over time. When dealing with multiple time series, static covariates can help specific models improve forecasts. Darts' models will only consider static covariates embedded in the target series (the series for which we want to predict future values) and not past and/or future covariates (external data).\n", + "\n", + "In this tutorial we look at:\n", + "\n", + "1. how to define static covariates (numeric and/or categorical)\n", + "2. how to add static covariates to an existing **target** series\n", + "3. how to add static covariates at TimeSeries creation\n", + "4. how to use TimeSeries.from_group_dataframe() for automatic extraction of TimeSeries with embedded static covariates\n", + "5. how to scale/transform/encode static covariates embedded in your series\n", + "6. how to use static covariates with Darts' models\n", + "\n", + "We start by generating a multivariate time series with three components `[\"comp1\", \"comp2\", \"comp3\"]`" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "13e632a4-ff6a-4d58-be65-b6bebc37ca2b", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "np.random.seed(0)\n", + "series = TimeSeries.from_times_and_values(\n", + " times=pd.date_range(start=\"2020-01-01\", periods=10, freq=\"h\"),\n", + " values=np.random.random((10, 3)),\n", + " columns=[\"comp1\", \"comp2\", \"comp3\"],\n", + ")\n", + "series.plot()" + ] + }, + { + "cell_type": "markdown", + "id": "bd068f2c-bc9c-45c7-b757-a7feae6449d5", + "metadata": {}, + "source": [ + "### 1. Defining static covariates\n", + "Define your static covariates as a pd.DataFrame where the columns represent the static variables and rows stand for the components of the uni/multivariate `TimeSeries` they will be added to.\n", + "\n", + "- The number of rows must either be 1 or equal to the number of components from `series`.\n", + "- Using a single row static covariate DataFrame with a multivariate (multi component) `series`, the static covariates will be mapped globally to all components." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "90431347-dca7-4dee-b7e4-ef4911da605c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " cont cat\n", + "0 0 a\n", + " cont cat\n", + "0 0 a\n", + "1 2 c\n", + "2 1 b\n" + ] + } + ], + "source": [ + "# arbitrary continuous and categorical static covariates (single row)\n", + "static_covs_single = pd.DataFrame(data={\"cont\": [0], \"cat\": [\"a\"]})\n", + "print(static_covs_single)\n", + "\n", + "# multivariate static covariates (multiple components). note that the number of rows matches the number of components of `series`\n", + "static_covs_multi = pd.DataFrame(data={\"cont\": [0, 2, 1], \"cat\": [\"a\", \"c\", \"b\"]})\n", + "print(static_covs_multi)" + ] + }, + { + "cell_type": "markdown", + "id": "127de718-dda2-4423-a02a-b354ef43ba08", + "metadata": {}, + "source": [ + "### 2. Add static covariates to an existing TimeSeries\n", + "Create a new series from an existing TimeSeries with added static covariates using method `with_static_covariates()` (see docs [here](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.with_static_covariates))\n", + "\n", + "- Single row static covarites with multivariate `series` creates \"global_components\" which are mapped to all components\n", + "- Multi row static covarites with multivariate `series` will be mapped to the component names of `series` (see static covariate index/row names)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "80e40a2e-508d-4494-a066-fa70c1cd520f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Single row static covarites with multivariate `series`\n", + "static_covariates cont cat\n", + "global_components 0.0 a\n", + "\n", + "Multi row static covarites with multivariate `series`\n", + "static_covariates cont cat\n", + "component \n", + "comp1 0.0 a\n", + "comp2 2.0 c\n", + "comp3 1.0 b\n" + ] + } + ], + "source": [ + "assert series.static_covariates is None\n", + "\n", + "series_single = series.with_static_covariates(static_covs_single)\n", + "print(\"Single row static covarites with multivariate `series`\")\n", + "print(series_single.static_covariates)\n", + "\n", + "series_multi = series.with_static_covariates(static_covs_multi)\n", + "print(\"\\nMulti row static covarites with multivariate `series`\")\n", + "print(series_multi.static_covariates)" + ] + }, + { + "cell_type": "markdown", + "id": "638f47fb-282c-45d5-8708-42af483912a3", + "metadata": {}, + "source": [ + "### 3. Adding static covariates at TimeSeries construction\n", + "Static covariates can also directly be added when creating a time series with parameter `static_covariates` in most of `TimeSeries.from_*()` methods." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2106e10b-8a32-4e48-9d3c-2c77bfab4473", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "static_covariates cont cat\n", + "component \n", + "comp1 0.0 a\n", + "comp2 2.0 c\n", + "comp3 1.0 b\n" + ] + } + ], + "source": [ + "# add arbitrary continuous and categorical static covariates\n", + "series = TimeSeries.from_values(\n", + " values=np.random.random((10, 3)),\n", + " columns=[\"comp1\", \"comp2\", \"comp3\"],\n", + " static_covariates=static_covs_multi,\n", + ")\n", + "print(series.static_covariates)" + ] + }, + { + "cell_type": "markdown", + "id": "9caa4638-35d0-4397-98eb-672b18d94115", + "metadata": {}, + "source": [ + "### Using static covariates with multiple TimeSeries\n", + "Static covariates are only really useful if we use them across multiple TimeSeries. By convention, the static covariates layout (pd.DataFrame columns, index) has to be the same for all series." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b6af377e-e0b8-4e36-9df7-d625789b95ee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Valid static covariates for multiple series\n", + "static_covariates ID var1\n", + "global_components SERIES1 0.5\n", + "static_covariates ID var1\n", + "global_components SERIES2 0.75\n" + ] + } + ], + "source": [ + "first_series = series.with_static_covariates(\n", + " pd.DataFrame(data={\"ID\": [\"SERIES1\"], \"var1\": [0.5]})\n", + ")\n", + "second_series = series.with_static_covariates(\n", + " pd.DataFrame(data={\"ID\": [\"SERIES2\"], \"var1\": [0.75]})\n", + ")\n", + "\n", + "print(\"Valid static covariates for multiple series\")\n", + "print(first_series.static_covariates)\n", + "print(second_series.static_covariates)\n", + "\n", + "series_multi = [first_series, second_series]" + ] + }, + { + "cell_type": "markdown", + "id": "18b91481-969b-449c-9f90-3ee749a93f43", + "metadata": {}, + "source": [ + "### 4. Extract a list of time series by groups from a DataFrame using `from_group_dataframe()`\n", + "If your DataFrame contains multiple time series which are stacked vertically, you can use `TimeSeries.from_group_dataframe()` (see the docs [here](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_group_dataframe)) to extract them as a list of TimeSeries instances. This requires a column or list of columns for which the DataFrame should grouped by (parameter `group_cols`).\n", + "`group_cols` will automatically be added as static covariates to the individual series. Additional columns can be used as static covariates with parameter `static_cols`. \n", + "\n", + "In the example below, we generate a DataFrame which contains data of two distinct time series (overlapping/repeating dates) \"SERIES1\" and \"SERIES2\" and extract the TimeSeries with `from_group_dataframe()`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "643fe7fd-0442-42ef-9109-0b58b720edd4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input DataFrame\n", + " dates comp1 comp2 comp3 ID var1\n", + "0 2020-01-01 0.158970 0.820993 0.976761 SERIES1 0.50\n", + "1 2020-01-02 0.110375 0.097101 0.604846 SERIES1 0.50\n", + "2 2020-01-03 0.656330 0.837945 0.739264 SERIES1 0.50\n", + "3 2020-01-01 0.138183 0.096098 0.039188 SERIES2 0.75\n", + "4 2020-01-02 0.196582 0.976459 0.282807 SERIES2 0.75\n", + "5 2020-01-03 0.368725 0.468651 0.120197 SERIES2 0.75\n", + "\n", + "2 series were extracted from the input DataFrame\n", + "Static covariates of series 0\n", + "static_covariates ID var1\n", + "global_components SERIES1 0.5\n", + "Static covariates of series 1\n", + "static_covariates ID var1\n", + "global_components SERIES2 0.75\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# generate an DataFrame example\n", + "df = pd.DataFrame(\n", + " data={\n", + " \"dates\": [\n", + " \"2020-01-01\",\n", + " \"2020-01-02\",\n", + " \"2020-01-03\",\n", + " \"2020-01-01\",\n", + " \"2020-01-02\",\n", + " \"2020-01-03\",\n", + " ],\n", + " \"comp1\": np.random.random((6,)),\n", + " \"comp2\": np.random.random((6,)),\n", + " \"comp3\": np.random.random((6,)),\n", + " \"ID\": [\"SERIES1\", \"SERIES1\", \"SERIES1\", \"SERIES2\", \"SERIES2\", \"SERIES2\"],\n", + " \"var1\": [0.5, 0.5, 0.5, 0.75, 0.75, 0.75],\n", + " }\n", + ")\n", + "print(\"Input DataFrame\")\n", + "print(df)\n", + "\n", + "series_multi = TimeSeries.from_group_dataframe(\n", + " df,\n", + " time_col=\"dates\",\n", + " group_cols=\"ID\", # individual time series are extracted by grouping `df` by `group_cols`\n", + " static_cols=[\n", + " \"var1\"\n", + " ], # also extract these additional columns as static covariates (without grouping)\n", + " value_cols=[\n", + " \"comp1\",\n", + " \"comp2\",\n", + " \"comp3\",\n", + " ], # optionally, specify the time varying columns\n", + ")\n", + "\n", + "print(f\"\\n{len(series_multi)} series were extracted from the input DataFrame\")\n", + "for i, ts in enumerate(series_multi):\n", + " print(f\"Static covariates of series {i}\")\n", + " print(ts.static_covariates)\n", + " ts[\"comp1\"].plot(label=f\"comp1_series_{i}\")" + ] + }, + { + "cell_type": "markdown", + "id": "71c466ae-b136-4307-a9b5-550554ffc467", + "metadata": {}, + "source": [ + "### 5. Scaling/Encoding static covariate data\n", + "There might be the need to scale numeric static covariates or encode categorical static covariates as not all models can handle non numeric static covariates. \n", + "\n", + "Use `StaticCovariatesTransformer` (see the docs [here](https://unit8co.github.io/darts/generated_api/darts.dataprocessing.transformers.static_covariates_transformer.html#staticcovariatestransformer)) to scale/transform static covariates. By default it uses a `MinMaxScaler` to scale numeric data, and a `OrdinalEncoder` to encode categorical data.\n", + "Both the numeric scaler and categorical encoder will be fit globally on static covariate data of all time series passed to `StaticCovariatesTransformer.fit()`" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "32455f97-f147-4b02-9838-11632488b25f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original series 0\n", + "static_covariates ID var1\n", + "global_components SERIES1 0.5\n", + "Transformed series 0\n", + "static_covariates ID var1\n", + "global_components 0.0 0.0\n", + "\n", + "Original series 1\n", + "static_covariates ID var1\n", + "global_components SERIES2 0.75\n", + "Transformed series 1\n", + "static_covariates ID var1\n", + "global_components 1.0 1.0\n", + "\n" + ] + } + ], + "source": [ + "from darts.dataprocessing.transformers import StaticCovariatesTransformer\n", + "\n", + "transformer = StaticCovariatesTransformer()\n", + "series_transformed = transformer.fit_transform(series_multi)\n", + "\n", + "for i, (ts, ts_scaled) in enumerate(zip(series_multi, series_transformed)):\n", + " print(f\"Original series {i}\")\n", + " print(ts.static_covariates)\n", + " print(f\"Transformed series {i}\")\n", + " print(ts_scaled.static_covariates)\n", + " print(\"\")" + ] + }, + { + "cell_type": "markdown", + "id": "e6db96f9-4b1f-4f7f-9897-144b8e8b270d", + "metadata": {}, + "source": [ + "### 6. Forecasting example with `TFTModel` and static covariates\n", + "Now let's find out if adding static covariates to a forecasting problem can improve predictive accuracy.\n", + "We'll use `TFTModel` which supports numeric and categorical static covariates." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "a0e90425-1196-4a84-8f5e-b5a1597bcda5", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "from pytorch_lightning.callbacks import TQDMProgressBar\n", + "\n", + "from darts import TimeSeries\n", + "from darts.models import TFTModel\n", + "from darts.utils import timeseries_generation as tg\n", + "from darts.dataprocessing.transformers import StaticCovariatesTransformer\n", + "from darts.metrics import rmse" + ] + }, + { + "cell_type": "markdown", + "id": "58c2fd83-6a2d-4b44-a88a-a5579695011e", + "metadata": {}, + "source": [ + "#### 6.1 Experiment setup\n", + "For our experiment, we generate two time series: a fully sine wave series (label = smooth) and sine wave series with some irregularities every other period (label = irregular, see the ramps at periods 2 and 4)." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "1f8af919-e0cb-4d4c-9577-04954421e187", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "period = 20\n", + "sine_series = tg.sine_timeseries(\n", + " length=4 * period, value_frequency=1 / period, column_name=\"smooth\", freq=\"h\"\n", + ")\n", + "\n", + "sine_vals = sine_series.values()\n", + "linear_vals = np.expand_dims(np.linspace(1, -1, num=19), -1)\n", + "\n", + "sine_vals[21:40] = linear_vals\n", + "sine_vals[61:80] = linear_vals\n", + "irregular_series = TimeSeries.from_times_and_values(\n", + " values=sine_vals, times=sine_series.time_index, columns=[\"irregular\"]\n", + ")\n", + "sine_series.plot()\n", + "irregular_series.plot()" + ] + }, + { + "cell_type": "markdown", + "id": "783504a3-65f3-40bc-8a1f-ed5a090b98e7", + "metadata": {}, + "source": [ + "We will use three different setups for training and evaluation:\n", + "\n", + "1. fit/predict without static covariates\n", + "2. fit/predict with binary (numeric) static covariates\n", + "3. fit/predict with categorical static covariates\n", + "\n", + "For each setup we'll train the model on both series and then only use only the 3rd period (sine wave for both series) to predict the 4th period (sine for \"smooth\" and ramp for \"irregular\").\n", + "\n", + "What we hope for is that the model without static covariates performs worse than the other ones. The non-static model should not be able to recognize whether the underlying series used in `predict()` is the *smooth* or the *irregular* series as it only gets a sine wave curve as input (3rd period). This should result in a forecast somewhere inbetween the smooth and irregular series (learned by minimizing the global loss during training).\n", + "\n", + "Now this is where static covariates can really help. For example, we can embed data about the curve type into the **target** series through static covariates. With this information, we'd expect the models to generate improved forecasts.\n", + "\n", + "First we create some helper functions to apply the same experiment conditions to all models." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "76279b10-1c94-46c7-9b91-6b59398a6c8b", + "metadata": {}, + "outputs": [], + "source": [ + "def test_case(model, train_series, predict_series):\n", + " \"\"\"helper function which performs model training, prediction and plotting\"\"\"\n", + " model.fit(train_series)\n", + " preds = model.predict(\n", + " n=int(period / 2), num_samples=250, series=predict_series, verbose=False\n", + " )\n", + " for ts, ps in zip(train_series, preds):\n", + " ts.plot()\n", + " ps.plot()\n", + " plt.show()\n", + " return preds\n", + "\n", + "\n", + "def get_model_params():\n", + " \"\"\"helper function that generates model parameters with a new Progress Bar object\"\"\"\n", + " return {\n", + " \"input_chunk_length\": int(period / 2),\n", + " \"output_chunk_length\": int(period / 2),\n", + " \"add_encoders\": {\n", + " \"datetime_attribute\": {\"future\": [\"hour\"]}\n", + " }, # TFTModel requires future input, with this we won't have to supply any future_covariates\n", + " \"random_state\": 42,\n", + " \"n_epochs\": 150,\n", + " \"pl_trainer_kwargs\": {\n", + " \"callbacks\": [TQDMProgressBar(refresh_rate=4)],\n", + " },\n", + " }" + ] + }, + { + "cell_type": "markdown", + "id": "99faf9bc-305a-4897-8f10-5f8ac3777601", + "metadata": {}, + "source": [ + "### 6.2 Forecasting without static covariates\n", + "Let's train the first model without any static covariates" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "7a8411c3-b02a-4b19-97bb-46a253f43338", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "6bdf2f0868df4253b621aa3236eee2ab", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d52eb2242b3c4f25bfc82fe27f631419", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Predicting: 4it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "train_series = [sine_series, irregular_series]\n", + "for series in train_series:\n", + " assert not series.has_static_covariates\n", + "\n", + "model = TFTModel(**get_model_params())\n", + "preds = test_case(\n", + " model,\n", + " train_series,\n", + " predict_series=[series[:60] for series in train_series],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d0c99b81-c62a-4712-8b53-57adfaae4b10", + "metadata": {}, + "source": [ + "From the plot you can see that the forecast began after period 3 (~01-03-2022 - 12:00). The prediciton input were the last `input_chunk_length=10` values - which are identical for both series (sine wave).\n", + "\n", + "As expected, the model was not able to determine the type of the underlying prediciton series (smooth or irregular) and generate a sine-wave like forecast for both." + ] + }, + { + "cell_type": "markdown", + "id": "fb22bf06-9803-40f6-9d37-25e65840a4ad", + "metadata": {}, + "source": [ + "### 6.3 Forecasting with 0/1 binary static covariates (numeric)\n", + "\n", + "Now let's repeat the experiment but this time we add information about the curve type as a binary (numeric) static covariate named `\"curve_type\"`." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "44b34b11-d5de-4d6d-a817-b584b12fb12a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "static_covariates curve_type\n", + "component \n", + "smooth 1.0\n", + "static_covariates curve_type\n", + "component \n", + "irregular 0.0\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9a5bf463e99b4c0baecc2944a40a0f65", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "259476d0a70f41eca31008b1ecdaa42e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Predicting: 4it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sine_series_st_bin = sine_series.with_static_covariates(\n", + " pd.DataFrame(data={\"curve_type\": [1]})\n", + ")\n", + "irregular_series_st_bin = irregular_series.with_static_covariates(\n", + " pd.DataFrame(data={\"curve_type\": [0]})\n", + ")\n", + "\n", + "train_series = [sine_series_st_bin, irregular_series_st_bin]\n", + "for series in train_series:\n", + " print(series.static_covariates)\n", + "\n", + "model = TFTModel(**get_model_params())\n", + "preds_st_bin = test_case(\n", + " model,\n", + " train_series,\n", + " predict_series=[series[:60] for series in train_series],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "dbfe3526-d346-4eda-91c3-3e61cc9de0be", + "metadata": {}, + "source": [ + "That already looks much better! The model was able to identify the curve type/category from the binary static covariate feature." + ] + }, + { + "cell_type": "markdown", + "id": "9298d11f-3864-461d-a364-6b2e5e45d2f6", + "metadata": {}, + "source": [ + "### 6.4 Forecasting with categorical static covariates\n", + "The last experiment already showed promising results. So why not only use binary features for categorical data?\n", + "While it might have worked well for our two time series, if we had more curve types we'd need to one hot encode \n", + "the feature into a binary variable for each category. With a lot of categories, this lead to a large number of \n", + "features/predictors and multicollinearity which can lower the model's predictive accuracy.\n", + "\n", + "As a last experiment, let's use the curve type as a categorical feature. `TFTModel` learns an embedding for categorical features.\n", + "Darts' `TorchForecastingModels` (such as `TFTModel`) only support numeric data. Before training we need to transform the `\"curve_type\"` into a integer-valued feature with `StaticCovariatesTransformer` (see section 5.). " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "05d6e39c-ff99-4354-96be-756062ab3941", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Static covariates before encoding:\n", + "static_covariates curve_type\n", + "component \n", + "smooth smooth\n", + "\n", + "Static covariates after encoding:\n", + "static_covariates curve_type\n", + "component \n", + "smooth 1.0\n" + ] + } + ], + "source": [ + "sine_series_st_cat = sine_series.with_static_covariates(\n", + " pd.DataFrame(data={\"curve_type\": [\"smooth\"]})\n", + ")\n", + "irregular_series_st_cat = irregular_series.with_static_covariates(\n", + " pd.DataFrame(data={\"curve_type\": [\"non_smooth\"]})\n", + ")\n", + "\n", + "train_series = [sine_series_st_cat, irregular_series_st_cat]\n", + "print(\"Static covariates before encoding:\")\n", + "print(train_series[0].static_covariates)\n", + "\n", + "# use StaticCovariatesTransformer to encode categorical static covariates into numeric data\n", + "scaler = StaticCovariatesTransformer()\n", + "train_series = scaler.fit_transform(train_series)\n", + "print(\"\\nStatic covariates after encoding:\")\n", + "print(train_series[0].static_covariates)" + ] + }, + { + "cell_type": "markdown", + "id": "638b7f2b-203d-44cc-a5b2-ef3b46c0d4e3", + "metadata": {}, + "source": [ + "No all we need to do is tell `TFTModel` that `\"curve_type\"` is a categorical variable that requires embedding.\n", + "We can do so with model parameter `categorical_embedding_sizes` which is a dictionary of: {feature name: (number of categories, embedding size)}\n" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "dc7eb492-692d-41ec-958d-6bddbdaed937", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "8cac6c0f143d45bca19d71ce1afdc34a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Training: 0it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3200df5b97714f6081ef94a257fc125a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Predicting: 4it [00:00, ?it/s]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "n_categories = 2 # \"smooth\" and \"non_smooth\"\n", + "embedding_size = 2 # embed the categorical variable into a numeric vector of size 2\n", + "categorical_embedding_sizes = {\"curve_type\": (n_categories, embedding_size)}\n", + "\n", + "model = TFTModel(\n", + " categorical_embedding_sizes=categorical_embedding_sizes, **get_model_params()\n", + ")\n", + "preds_st_cat = test_case(\n", + " model,\n", + " train_series,\n", + " predict_series=[series[:60] for series in train_series],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "5ebcfe69-b5e3-40a6-971d-ddbc48d032a9", + "metadata": {}, + "source": [ + "Nice, that seems to have worked as well! As a last step, let's look at how the models performed compared to each other." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "dd7784f1-a7b4-4f37-9898-66120a421a97", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Metric\n", + " no st bin st cat st\n", + "RMSE 0.133118 0.027695 0.047532\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Metric\n", + " no st bin st cat st\n", + "RMSE 0.274442 0.155583 0.131133\n" + ] + } + ], + "source": [ + "for series, ps_no_st, ps_st_bin, ps_st_cat in zip(\n", + " train_series, preds, preds_st_bin, preds_st_cat\n", + "):\n", + " series[-40:].plot(label=\"target\")\n", + " ps_no_st.quantile(0.5).plot(label=\"no static covs\")\n", + " ps_st_bin.quantile(0.5).plot(label=\"binary static covs\")\n", + " ps_st_cat.quantile(0.5).plot(label=\"categorical static covs\")\n", + " plt.show()\n", + " print(\"Metric\")\n", + " print(\n", + " pd.DataFrame(\n", + " {\n", + " name: [rmse(series, ps)]\n", + " for name, ps in zip(\n", + " [\"no st\", \"bin st\", \"cat st\"], [ps_no_st, ps_st_bin, ps_st_cat]\n", + " )\n", + " },\n", + " index=[\"RMSE\"],\n", + " )\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "b30c424f-42d1-428d-8424-48c2ca052c66", + "metadata": {}, + "source": [ + "These are great results! Both approaches using static covariates decreased the RMSE by more than halve for both series compared to the baseline!\n", + "\n", + "*Note that we only used one static covariate feature, but you can use as many as want including mixtures of data types (numeric and categorical).*" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "darts_39", + "language": "python", + "name": "darts_39" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 044bd2b28f591651d0ef957aa1b3da7a90dfc9f3 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Sun, 17 Jul 2022 15:08:49 +0200 Subject: [PATCH 11/16] TFTModel docstring update --- darts/models/forecasting/tft_model.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py index 9bab80f543..364ee12ce0 100644 --- a/darts/models/forecasting/tft_model.py +++ b/darts/models/forecasting/tft_model.py @@ -683,9 +683,12 @@ def __init__( prediction time). hidden_continuous_size : int Default for hidden size for processing continuous variables - categorical_embedding_sizes : int - Embedding size for categorical static covariates. Only effective if the target series contains - categorical (non-numeric) static covariates. + categorical_embedding_sizes : dict + A dictionary containing embedding sizes for categorical static covariates. The keys are the column names + of the categorical static covariates. The values are tuples of integers with + `(number of unique categories, embedding size)`. For example `{"some_column": (64, 8)}`. + Note that `TorchForecastingModels` can only handle numeric data. Consider transforming/encoding your data + with `darts.dataprocessing.transformers.static_covariates_transformer.StaticCovariatesTransformer`. add_relative_index : bool Whether to add positional values to future covariates. Defaults to ``False``. This allows to use the TFTModel without having to pass future_covariates to :fun:`fit()` and From 094d57f4bec3d0e3fe7ac2b2bfe570f91f28aec9 Mon Sep 17 00:00:00 2001 From: Dennis Bader Date: Mon, 18 Jul 2022 19:35:42 +0200 Subject: [PATCH 12/16] Apply suggestions from code review Co-authored-by: Julien Herzen --- .../transformers/static_covariates_transformer.py | 4 ++-- docs/userguide/covariates.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index 3216c2ba2c..23863fb326 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -56,11 +56,11 @@ def __init__( into integer valued arrays where each integer stands for a specific category. cols_num Optionally, a list of column names for which to apply the numeric transformer ``scaler_num``. - By default, the transformer will infer all numerical features and scale them with `scaler_num`. + By default, the transformer will infer all numerical features based on types, and scale them with `scaler_num`. If an empty list, no column will be scaled. cols_cat Optionally, a list of column names for which to apply the categorical transformer `scaler_cat`. - By default, the transformer will infer all categorical features and transform them with `scaler_cat`. + By default, the transformer will infer all categorical features based on types, and transform them with `scaler_cat`. If an empty list, no column will be transformed. name A specific name for the :class:`StaticCovariatesTransformer`. diff --git a/docs/userguide/covariates.md b/docs/userguide/covariates.md index 7b8dd5055e..7b1f73f507 100644 --- a/docs/userguide/covariates.md +++ b/docs/userguide/covariates.md @@ -99,7 +99,7 @@ Let's have a look at some examples of past, future, and static covariates: Temporal attributes are powerful because they are known in advance and can help models capture trends and / or seasonal patterns of the `target` series. -Static attributes are powerful when working with multiple `targets`. The time independent information can help models identify the nature/environment of the underlying series and improve forecasts across different `targets`. +Static attributes are powerful when working with multiple `targets` (either multiple `TimeSeries`, or multivariate series containing multiple dimensions each). The time independent information can help models identify the nature/environment of the underlying series and improve forecasts across different `targets`. In this guide we'll focus on past and future covariates. Here's a simple rule-of-thumb to know if your series are **past** or **future covariates**: From 8e10b86a332324c6ce644771ffd5aa6998232e8e Mon Sep 17 00:00:00 2001 From: dennisbader Date: Mon, 18 Jul 2022 21:38:40 +0200 Subject: [PATCH 13/16] applied suggestions from PR review --- .../static_covariates_transformer.py | 98 ++++++++----------- 1 file changed, 40 insertions(+), 58 deletions(-) diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index 23863fb326..fa7cf9a6a3 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd from scipy.sparse import csr_matrix -from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, OrdinalEncoder +from sklearn.preprocessing import MinMaxScaler, OrdinalEncoder from darts.logging import get_logger, raise_log from darts.timeseries import TimeSeries @@ -30,14 +30,16 @@ def __init__( n_jobs: int = 1, verbose: bool = False, ): - """Generic wrapper class for scalers/encoders/transformers of static covariates. + """Generic wrapper class for scalers/encoders/transformers of static covariates. This transformer acts + only on static covariates of the series passed to ``fit()``, ``transform()``, ``fit_transform()``, and + ``inverse_transform()``. It can both scale numerical features, as well as encode categorical features. - The underlying ``scaler_num`` and ``scaler_cat`` have to implement the ``fit()``, ``transform()`` + The underlying ``scaler_num`` and ``scaler_cat`` have to implement the ``fit()``, ``transform()``, and ``inverse_transform()`` methods (typically from scikit-learn). By default, numerical and categorical columns/features are inferred and allocated to ``scaler_num`` and - ``scaler_cat``, respectively. Alternatively, specify which columns to scale/transform with ``cols_num`` and - ``cols_cat``. + ``scaler_cat``, respectively. Alternatively, specify which columns to scale/transform with ``cols_num`` + and ``cols_cat``. Both ``scaler_num`` and ``scaler_cat`` are fit globally on static covariate data from all series passed to :class:`StaticCovariatesTransformer.fit()` @@ -56,12 +58,12 @@ def __init__( into integer valued arrays where each integer stands for a specific category. cols_num Optionally, a list of column names for which to apply the numeric transformer ``scaler_num``. - By default, the transformer will infer all numerical features based on types, and scale them with `scaler_num`. - If an empty list, no column will be scaled. + By default, the transformer will infer all numerical features based on types, and scale them with + `scaler_num`. If an empty list, no column will be scaled. cols_cat Optionally, a list of column names for which to apply the categorical transformer `scaler_cat`. - By default, the transformer will infer all categorical features based on types, and transform them with `scaler_cat`. - If an empty list, no column will be transformed. + By default, the transformer will infer all categorical features based on types, and transform them with + `scaler_cat`. If an empty list, no column will be transformed. name A specific name for the :class:`StaticCovariatesTransformer`. n_jobs @@ -141,14 +143,12 @@ def fit( data = pd.concat([s.static_covariates for s in series], axis=0) self.cols = data.columns - # get all numeric and categorical columns - mask_num = data.columns.isin(data.select_dtypes(include=np.number).columns) - mask_cat = ~mask_num - # infer numeric and categorical columns if user didn't supply them at transformer construction if self.cols_num is None: + mask_num = data.columns.isin(data.select_dtypes(include=np.number).columns) self.cols_num = data.columns[mask_num] if self.cols_cat is None: + mask_cat = data.columns.isin(data.select_dtypes(exclude=np.number).columns) self.cols_cat = data.columns[mask_cat] self.mask_num = data.columns.isin(self.cols_num) @@ -159,8 +159,15 @@ def fit( self.scaler_num.fit(data[:, self.mask_num]) if sum(self.mask_cat): self.scaler_cat.fit(data[:, self.mask_cat]) - if isinstance(self.scaler_cat, OneHotEncoder): - # OneHotEncoder will generate more features, create a 1-many column map for that + # check how many features the transformer generates + n_cat_out = self.scaler_cat.transform( + np.expand_dims(data[0, self.mask_cat], 0) + ).shape[-1] + if n_cat_out == sum(self.mask_cat): + # transformer generates same number of features -> make a 1-1 column map + self.col_map_cat = OrderedDict({col: [col] for col in self.cols_cat}) + else: + # transformer generates more features (i.e. OneHotEncoder) -> create a 1-many column map self.col_map_cat = OrderedDict( { col: [f"{col}_{cat}" for cat in categories] @@ -169,9 +176,6 @@ def fit( ) } ) - else: - # other encoders will have a 1-1 column map - self.col_map_cat = OrderedDict({col: [col] for col in self.cols_cat}) else: self.col_map_cat = {} return self @@ -182,6 +186,7 @@ def transform( kwargs = {key: val for key, val in kwargs.items()} kwargs["component_mask"] = (self.mask_num, self.mask_cat) kwargs["col_map_cat"] = self.col_map_cat + kwargs["method"] = "transform" return super().transform(series, *args, **kwargs) def inverse_transform( @@ -214,6 +219,7 @@ def inverse_transform( kwargs["col_map_cat"] = OrderedDict( {name: [col] for col, names in self.col_map_cat.items() for name in names} ) + kwargs["method"] = "inverse_transform" return super().inverse_transform(series, *args, **kwargs) @staticmethod @@ -227,16 +233,16 @@ def ts_transform(series: TimeSeries, *args, **kwargs) -> TimeSeries: transformer_num, transformer_cat = args component_mask_num, component_mask_cat = kwargs.get("component_mask") col_map_cat = kwargs.get("col_map_cat") + method = kwargs.get("method") # "transform" or "inverse_transform" vals_num, vals_cat = StaticCovariatesTransformer._reshape_in( series, component_mask=(component_mask_num, component_mask_cat) ) - tr_out_num, tr_out_cat = None, None if sum(component_mask_num): - tr_out_num = transformer_num.transform(vals_num) + tr_out_num = getattr(transformer_num, method)(vals_num) if sum(component_mask_cat): - tr_out_cat = transformer_cat.transform(vals_cat) + tr_out_cat = getattr(transformer_cat, method)(vals_cat) # sparse one hot encoding to dense array if isinstance(tr_out_cat, csr_matrix): @@ -253,27 +259,8 @@ def ts_transform(series: TimeSeries, *args, **kwargs) -> TimeSeries: @staticmethod def ts_inverse_transform(series: TimeSeries, *args, **kwargs) -> TimeSeries: - transformer_num, transformer_cat = args - component_mask_num, component_mask_cat = kwargs.get("component_mask") - col_map_cat = kwargs.get("col_map_cat") - - vals_num, vals_cat = StaticCovariatesTransformer._reshape_in( - series, component_mask=(component_mask_num, component_mask_cat) - ) - tr_out_num, tr_out_cat = None, None - if sum(component_mask_num): - tr_out_num = transformer_num.inverse_transform(vals_num) - if sum(component_mask_cat): - tr_out_cat = transformer_cat.inverse_transform(vals_cat) - - transformed_df = StaticCovariatesTransformer._reshape_out( - series, - (tr_out_num, tr_out_cat), - component_mask=(component_mask_num, component_mask_cat), - col_map_cat=col_map_cat, - ) - - return series.with_static_covariates(transformed_df) + # inverse transform will be called with kwarg method="inverse_transform" + return StaticCovariatesTransformer.ts_transform(series, *args, **kwargs) def _transform_iterator( self, series: Sequence[TimeSeries] @@ -301,7 +288,6 @@ def _reshape_in( component_mask: Optional[Tuple[np.ndarray, np.ndarray]] = None, ) -> Tuple[np.array, np.array]: # we expect component mask to be (numeric component mask, categorical component mask) - assert component_mask is not None component_mask_num, component_mask_cat = component_mask # returns tuple of (numeric static covariates, categorical static covariates) @@ -316,24 +302,22 @@ def _reshape_out( col_map_cat: Optional[Dict[str, str]] = None, ) -> pd.DataFrame: # we expect component mask to be (numeric component mask, categorical component mask) - assert component_mask is not None component_mask_num, component_mask_cat = component_mask - - # `col_map_cat` contains information to which features the original categorical feature were mapped - # (i.e. 1-1 mapping for OrdinalEncoder, or 1-many mapping for OneHotEncoder) - assert col_map_cat is not None - vals_num, vals_cat = vals # infer the number of categorical output features - n_cat_cols = len( - np.unique([name for names in col_map_cat.values() for name in names]) - ) + # `col_map_cat` contains information to which features the original categorical feature were mapped + # (i.e. 1-1 mapping for OrdinalEncoder, or 1-many mapping for OneHotEncoder). + n_cat_cols = len({name for names in col_map_cat.values() for name in names}) # quick check if everything is in order - if vals_cat is None: - assert n_cat_cols == 0 - else: - assert n_cat_cols == vals_cat.shape[1] + n_vals_cat_cols = 0 if vals_cat is None else vals_cat.shape[1] + if n_vals_cat_cols != n_cat_cols: + raise_log( + ValueError( + f"Expected `{n_cat_cols}` categorical value columns but only encountered `{n_vals_cat_cols}`" + ), + logger, + ) data = {} idx_num, idx_cat = 0, 0 @@ -352,8 +336,6 @@ def _reshape_out( data[col_name] = vals_cat[:, idx_cat] static_cov_columns.append(col_name) idx_cat += 1 - else: - pass else: # is_num and is_cat are False -> feature is not part of transformer, use original values data[col] = series.static_covariates[col] static_cov_columns.append(col) From b9f5b3494f1a566f122ae6b6c073dbe864325333 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Mon, 18 Jul 2022 22:09:08 +0200 Subject: [PATCH 14/16] applied suggestions from PR review part 2 --- .ipynb_checkpoints/Untitled-checkpoint.ipynb | 6 ++ Untitled.ipynb | 33 ++++++++++ .../static_covariates_transformer.py | 66 ++++++++++--------- .../test_static_covariates_transformer.py | 36 +++++----- darts/timeseries.py | 4 +- docs/userguide/covariates.md | 5 ++ examples/15-static-covariates.ipynb | 10 +-- 7 files changed, 104 insertions(+), 56 deletions(-) create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb create mode 100644 Untitled.ipynb diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000000..363fcab7ed --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Untitled.ipynb b/Untitled.ipynb new file mode 100644 index 0000000000..9d969f0b3f --- /dev/null +++ b/Untitled.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "c28921d2-5852-488d-b2b6-e1161a1a3373", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "darts_37", + "language": "python", + "name": "darts_37" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/darts/dataprocessing/transformers/static_covariates_transformer.py b/darts/dataprocessing/transformers/static_covariates_transformer.py index fa7cf9a6a3..acbe44cb7e 100644 --- a/darts/dataprocessing/transformers/static_covariates_transformer.py +++ b/darts/dataprocessing/transformers/static_covariates_transformer.py @@ -22,8 +22,8 @@ class StaticCovariatesTransformer(InvertibleDataTransformer, FittableDataTransformer): def __init__( self, - scaler_num=None, - scaler_cat=None, + transformer_num=None, + transformer_cat=None, cols_num: Optional[List[str]] = None, cols_cat: Optional[List[str]] = None, name="StaticCovariatesTransformer", @@ -34,36 +34,36 @@ def __init__( only on static covariates of the series passed to ``fit()``, ``transform()``, ``fit_transform()``, and ``inverse_transform()``. It can both scale numerical features, as well as encode categorical features. - The underlying ``scaler_num`` and ``scaler_cat`` have to implement the ``fit()``, ``transform()``, + The underlying ``transformer_num`` and ``transformer_cat`` have to implement the ``fit()``, ``transform()``, and ``inverse_transform()`` methods (typically from scikit-learn). - By default, numerical and categorical columns/features are inferred and allocated to ``scaler_num`` and - ``scaler_cat``, respectively. Alternatively, specify which columns to scale/transform with ``cols_num`` + By default, numerical and categorical columns/features are inferred and allocated to ``transformer_num`` and + ``transformer_cat``, respectively. Alternatively, specify which columns to scale/transform with ``cols_num`` and ``cols_cat``. - Both ``scaler_num`` and ``scaler_cat`` are fit globally on static covariate data from all series passed - to :class:`StaticCovariatesTransformer.fit()` + Both ``transformer_num`` and ``transformer_cat`` are fit globally on static covariate data from all series + passed to :class:`StaticCovariatesTransformer.fit()` Parameters ---------- - scaler_num - The scaler to transform numeric static covariate columns with. It must provide ``fit()``, + transformer_num + The transformer to transform numeric static covariate columns with. It must provide ``fit()``, ``transform()`` and ``inverse_transform()`` methods. Default: :class:`sklearn.preprocessing.MinMaxScaler(feature_range=(0, 1))`; this will scale all values between 0 and 1. - scaler_cat + transformer_cat The encoder to transform categorical static covariate columns with. It must provide ``fit()``, ``transform()`` and ``inverse_transform()`` methods. Default: :class:`sklearn.preprocessing.OrdinalEncoder()`; this will convert categories into integer valued arrays where each integer stands for a specific category. cols_num - Optionally, a list of column names for which to apply the numeric transformer ``scaler_num``. + Optionally, a list of column names for which to apply the numeric transformer ``transformer_num``. By default, the transformer will infer all numerical features based on types, and scale them with - `scaler_num`. If an empty list, no column will be scaled. + `transformer_num`. If an empty list, no column will be scaled. cols_cat - Optionally, a list of column names for which to apply the categorical transformer `scaler_cat`. + Optionally, a list of column names for which to apply the categorical transformer `transformer_cat`. By default, the transformer will infer all categorical features based on types, and transform them with - `scaler_cat`. If an empty list, no column will be transformed. + `transformer_cat`. If an empty list, no column will be transformed. name A specific name for the :class:`StaticCovariatesTransformer`. n_jobs @@ -103,21 +103,25 @@ def __init__( comp3 0.5 1.0 """ super().__init__(name=name, n_jobs=n_jobs, verbose=verbose) - self.scaler_num = MinMaxScaler() if scaler_num is None else scaler_num - self.scaler_cat = OrdinalEncoder() if scaler_cat is None else scaler_cat + self.transformer_num = ( + MinMaxScaler() if transformer_num is None else transformer_num + ) + self.transformer_cat = ( + OrdinalEncoder() if transformer_cat is None else transformer_cat + ) - for scaler, scaler_name in zip( - [self.scaler_num, self.scaler_cat], - ["scaler_num", "scaler_cat"], + for transformer, transformer_name in zip( + [self.transformer_num, self.transformer_cat], + ["transformer_num", "transformer_cat"], ): if ( - not callable(getattr(scaler, "fit", None)) - or not callable(getattr(scaler, "transform", None)) - or not callable(getattr(scaler, "inverse_transform", None)) + not callable(getattr(transformer, "fit", None)) + or not callable(getattr(transformer, "transform", None)) + or not callable(getattr(transformer, "inverse_transform", None)) ): raise_log( ValueError( - f"The provided `{scaler_name}` object must have fit(), transform() and " + f"The provided `{transformer_name}` object must have fit(), transform() and " f"inverse_transform() methods" ), logger, @@ -156,11 +160,11 @@ def fit( data = data.to_numpy(copy=False) if sum(self.mask_num): - self.scaler_num.fit(data[:, self.mask_num]) + self.transformer_num.fit(data[:, self.mask_num]) if sum(self.mask_cat): - self.scaler_cat.fit(data[:, self.mask_cat]) + self.transformer_cat.fit(data[:, self.mask_cat]) # check how many features the transformer generates - n_cat_out = self.scaler_cat.transform( + n_cat_out = self.transformer_cat.transform( np.expand_dims(data[0, self.mask_cat], 0) ).shape[-1] if n_cat_out == sum(self.mask_cat): @@ -172,7 +176,7 @@ def fit( { col: [f"{col}_{cat}" for cat in categories] for col, categories in zip( - self.cols_cat, self.scaler_cat.categories_ + self.cols_cat, self.transformer_cat.categories_ ) } ) @@ -268,8 +272,8 @@ def _transform_iterator( # use numerical and categorical transformers for 'ts_transform()' return zip( series, - [self.scaler_num] * len(series), - [self.scaler_cat] * len(series), + [self.transformer_num] * len(series), + [self.transformer_cat] * len(series), ) def _inverse_transform_iterator( @@ -278,8 +282,8 @@ def _inverse_transform_iterator( # use numerical and categorical transformers for 'ts_inverse_transform()' return zip( series, - [self.scaler_num] * len(series), - [self.scaler_cat] * len(series), + [self.transformer_num] * len(series), + [self.transformer_cat] * len(series), ) @staticmethod diff --git a/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py b/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py index 36a62bd7b5..a8e00033a2 100644 --- a/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py +++ b/darts/tests/dataprocessing/transformers/test_static_covariates_transformer.py @@ -56,7 +56,7 @@ def test_scaling_single_series(self): ) for series in [self.series1, self.series2]: scaler = StaticCovariatesTransformer( - scaler_num=MinMaxScaler(feature_range=(-1, 1)) + transformer_num=MinMaxScaler(feature_range=(-1, 1)) ) self.helper_test_scaling(series, scaler, test_values) @@ -68,23 +68,23 @@ def test_scaling_single_series(self): ] ) for series in [self.series1, self.series2]: - scaler = StaticCovariatesTransformer(scaler_cat=OneHotEncoder()) + scaler = StaticCovariatesTransformer(transformer_cat=OneHotEncoder()) self.helper_test_scaling(series, scaler, test_values) def test_single_type_scaler(self): - scaler_cont = StaticCovariatesTransformer() + transformer_cont = StaticCovariatesTransformer() series_cont = self.series1.with_static_covariates( self.series1.static_covariates[["cont1", "cont2"]] ) test_cont = np.array([[0.0, 0.0], [0.5, 0.5], [1.0, 1.0]]) - self.helper_test_scaling(series_cont, scaler_cont, test_cont) + self.helper_test_scaling(series_cont, transformer_cont, test_cont) - scaler_cat = StaticCovariatesTransformer() + transformer_cat = StaticCovariatesTransformer() series_cat = self.series1.with_static_covariates( self.series1.static_covariates[["cat1", "cat2"]] ) test_cat = np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 2.0]]) - self.helper_test_scaling(series_cat, scaler_cat, test_cat) + self.helper_test_scaling(series_cat, transformer_cat, test_cat) def test_selected_columns(self): test_cont = ( @@ -94,10 +94,10 @@ def test_selected_columns(self): .astype(dtype={1: "O", 3: "O"}) .values ) - scaler_cont2 = StaticCovariatesTransformer( + transformer_cont2 = StaticCovariatesTransformer( cols_num=["cont1", "cont2"], cols_cat=[] ) - self.helper_test_scaling(self.series1, scaler_cont2, test_cont) + self.helper_test_scaling(self.series1, transformer_cont2, test_cont) test_contcat = ( pd.DataFrame( @@ -106,16 +106,18 @@ def test_selected_columns(self): .astype(dtype={1: "O"}) .values ) - scaler_contcat = StaticCovariatesTransformer( + transformer_contcat = StaticCovariatesTransformer( cols_num=["cont2"], cols_cat=["cat2"] ) - self.helper_test_scaling(self.series1, scaler_contcat, test_contcat) + self.helper_test_scaling(self.series1, transformer_contcat, test_contcat) test_cat = pd.DataFrame( [[0.0, 0.0, 0.1, 0.0], [1.0, 1.0, 0.2, 1], [2.0, 2.0, 0.3, 2.0]], ).values - scaler_cat = StaticCovariatesTransformer(cols_num=[], cols_cat=["cat1", "cat2"]) - self.helper_test_scaling(self.series1, scaler_cat, test_cat) + transformer_cat = StaticCovariatesTransformer( + cols_num=[], cols_cat=["cat1", "cat2"] + ) + self.helper_test_scaling(self.series1, transformer_cat, test_cat) def test_custom_scaler(self): # invalid scaler with missing inverse_transform @@ -127,19 +129,19 @@ def transform(self): pass with pytest.raises(ValueError): - _ = StaticCovariatesTransformer(scaler_num=InvalidScaler()) + _ = StaticCovariatesTransformer(transformer_num=InvalidScaler()) with pytest.raises(ValueError): - _ = StaticCovariatesTransformer(scaler_cat=InvalidScaler()) + _ = StaticCovariatesTransformer(transformer_cat=InvalidScaler()) class ValidScaler(InvalidScaler): def inverse_transform(self): pass - _ = StaticCovariatesTransformer(scaler_num=ValidScaler()) - _ = StaticCovariatesTransformer(scaler_cat=ValidScaler()) + _ = StaticCovariatesTransformer(transformer_num=ValidScaler()) + _ = StaticCovariatesTransformer(transformer_cat=ValidScaler()) _ = StaticCovariatesTransformer( - scaler_num=ValidScaler(), scaler_cat=ValidScaler() + transformer_num=ValidScaler(), transformer_cat=ValidScaler() ) def test_scaling_multi_series(self): diff --git a/darts/timeseries.py b/darts/timeseries.py index c78028c4a6..9873367f3b 100644 --- a/darts/timeseries.py +++ b/darts/timeseries.py @@ -770,9 +770,7 @@ def from_group_dataframe( # store static covariate Series and group DataFrame (without static cov columns) splits.append( ( - pd.DataFrame([static_cov_vals], columns=static_cov_cols).astype( - {col: df[col].dtype for col in static_cov_cols} - ), + pd.DataFrame([static_cov_vals], columns=static_cov_cols), group.drop(columns=static_cov_cols), ) ) diff --git a/docs/userguide/covariates.md b/docs/userguide/covariates.md index 7b1f73f507..cbee309ce8 100644 --- a/docs/userguide/covariates.md +++ b/docs/userguide/covariates.md @@ -125,17 +125,22 @@ Model | Past Covariates | Future Covariates | Static Covariates --- | :---: | :---: | :---: **Local Forecasting Models (LFMs)** | | | `ExponentialSmoothing` | | | +`BATS` and `TBATS` | | | `Theta` and `FourTheta` | | | `FFT` | | | +`Croston method`| | | `ARIMA` | | ✅ | `VARIMA` | | ✅ | `AutoARIMA` | | ✅ | +`StatsForecastAutoARIMA` | | ✅ | +`KalmanForecaster` | | ✅ | `Prophet` | | ✅ | **Global Forecasting Models (GFMs)** | | | `RegressionModel`* | ✅ | ✅ | `RNNModel`** | | ✅ | `BlockRNNModel`*** | ✅ | | `NBEATSModel` | ✅ | | +`NHiTSModel` | ✅ | | `TCNModel` | ✅ | | `TransformerModel` | ✅ | | `TFTModel` | ✅ | ✅ | ✅ diff --git a/examples/15-static-covariates.ipynb b/examples/15-static-covariates.ipynb index 724fc1192d..c658df5ee4 100644 --- a/examples/15-static-covariates.ipynb +++ b/examples/15-static-covariates.ipynb @@ -364,11 +364,11 @@ "id": "71c466ae-b136-4307-a9b5-550554ffc467", "metadata": {}, "source": [ - "### 5. Scaling/Encoding static covariate data\n", + "### 5. Scaling/Encoding/Transforming static covariate data\n", "There might be the need to scale numeric static covariates or encode categorical static covariates as not all models can handle non numeric static covariates. \n", "\n", "Use `StaticCovariatesTransformer` (see the docs [here](https://unit8co.github.io/darts/generated_api/darts.dataprocessing.transformers.static_covariates_transformer.html#staticcovariatestransformer)) to scale/transform static covariates. By default it uses a `MinMaxScaler` to scale numeric data, and a `OrdinalEncoder` to encode categorical data.\n", - "Both the numeric scaler and categorical encoder will be fit globally on static covariate data of all time series passed to `StaticCovariatesTransformer.fit()`" + "Both the numeric and categorical transformers will be fit globally on static covariate data of all time series passed to `StaticCovariatesTransformer.fit()`" ] }, { @@ -975,9 +975,9 @@ ], "metadata": { "kernelspec": { - "display_name": "darts_39", + "display_name": "darts_37", "language": "python", - "name": "darts_39" + "name": "darts_37" }, "language_info": { "codemirror_mode": { @@ -989,7 +989,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.12" + "version": "3.7.12" } }, "nbformat": 4, From ba82e6d5337eb805f4ab26251626f3a682dfc369 Mon Sep 17 00:00:00 2001 From: dennisbader Date: Tue, 19 Jul 2022 21:13:09 +0200 Subject: [PATCH 15/16] added automatic embedding size option for TFTModel --- darts/models/forecasting/block_rnn_model.py | 6 +- darts/models/forecasting/nbeats.py | 6 +- darts/models/forecasting/nhits.py | 6 +- darts/models/forecasting/rnn_model.py | 6 +- darts/models/forecasting/tcn_model.py | 6 +- darts/models/forecasting/tft_model.py | 70 +++++++++++-------- darts/models/forecasting/transformer_model.py | 12 ++-- 7 files changed, 62 insertions(+), 50 deletions(-) diff --git a/darts/models/forecasting/block_rnn_model.py b/darts/models/forecasting/block_rnn_model.py index 3dd8e9d606..736a6a2a2b 100644 --- a/darts/models/forecasting/block_rnn_model.py +++ b/darts/models/forecasting/block_rnn_model.py @@ -178,12 +178,12 @@ def __init__( PyTorch loss function used for training. This parameter will be ignored for probabilistic models if the ``likelihood`` parameter is specified. Default: ``torch.nn.MSELoss()``. - torch_metrics - A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found - at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. likelihood One of Darts' :meth:`Likelihood ` models to be used for probabilistic forecasts. Default: ``None``. + torch_metrics + A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found + at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. optimizer_cls The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. optimizer_kwargs diff --git a/darts/models/forecasting/nbeats.py b/darts/models/forecasting/nbeats.py index 189ec9c93b..8616cea110 100644 --- a/darts/models/forecasting/nbeats.py +++ b/darts/models/forecasting/nbeats.py @@ -601,12 +601,12 @@ def __init__( PyTorch loss function used for training. This parameter will be ignored for probabilistic models if the ``likelihood`` parameter is specified. Default: ``torch.nn.MSELoss()``. - torch_metrics - A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found - at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. likelihood One of Darts' :meth:`Likelihood ` models to be used for probabilistic forecasts. Default: ``None``. + torch_metrics + A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found + at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. optimizer_cls The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. optimizer_kwargs diff --git a/darts/models/forecasting/nhits.py b/darts/models/forecasting/nhits.py index 7bd06b7a37..9f5efb0be2 100644 --- a/darts/models/forecasting/nhits.py +++ b/darts/models/forecasting/nhits.py @@ -537,12 +537,12 @@ def __init__( PyTorch loss function used for training. This parameter will be ignored for probabilistic models if the ``likelihood`` parameter is specified. Default: ``torch.nn.MSELoss()``. - torch_metrics - A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found - at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. likelihood One of Darts' :meth:`Likelihood ` models to be used for probabilistic forecasts. Default: ``None``. + torch_metrics + A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found + at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. optimizer_cls The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. optimizer_kwargs diff --git a/darts/models/forecasting/rnn_model.py b/darts/models/forecasting/rnn_model.py index 2f709be84a..ddfba48c65 100644 --- a/darts/models/forecasting/rnn_model.py +++ b/darts/models/forecasting/rnn_model.py @@ -257,12 +257,12 @@ def __init__( PyTorch loss function used for training. This parameter will be ignored for probabilistic models if the ``likelihood`` parameter is specified. Default: ``torch.nn.MSELoss()``. - torch_metrics - A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found - at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. likelihood One of Darts' :meth:`Likelihood ` models to be used for probabilistic forecasts. Default: ``None``. + torch_metrics + A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found + at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. optimizer_cls The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. optimizer_kwargs diff --git a/darts/models/forecasting/tcn_model.py b/darts/models/forecasting/tcn_model.py index 008805e146..d1d39c111a 100644 --- a/darts/models/forecasting/tcn_model.py +++ b/darts/models/forecasting/tcn_model.py @@ -300,12 +300,12 @@ def __init__( PyTorch loss function used for training. This parameter will be ignored for probabilistic models if the ``likelihood`` parameter is specified. Default: ``torch.nn.MSELoss()``. - torch_metrics - A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found - at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. likelihood One of Darts' :meth:`Likelihood ` models to be used for probabilistic forecasts. Default: ``None``. + torch_metrics + A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found + at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. optimizer_cls The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. optimizer_kwargs diff --git a/darts/models/forecasting/tft_model.py b/darts/models/forecasting/tft_model.py index 364ee12ce0..c6a807b0bb 100644 --- a/darts/models/forecasting/tft_model.py +++ b/darts/models/forecasting/tft_model.py @@ -22,6 +22,7 @@ _InterpretableMultiHeadAttention, _MultiEmbedding, _VariableSelectionNetwork, + get_embedding_size, ) from darts.models.forecasting.torch_forecasting_model import MixedCovariatesTorchModel from darts.utils.data import ( @@ -630,7 +631,9 @@ def __init__( feed_forward: str = "GatedResidualNetwork", dropout: float = 0.1, hidden_continuous_size: int = 8, - categorical_embedding_sizes: Optional[Dict[str, Tuple[int, int]]] = None, + categorical_embedding_sizes: Optional[ + Dict[str, Union[int, Tuple[int, int]]] + ] = None, add_relative_index: bool = False, loss_fn: Optional[nn.Module] = None, likelihood: Optional[Likelihood] = None, @@ -656,51 +659,50 @@ def __init__( Parameters ---------- - input_chunk_length : int + input_chunk_length Encoder length; number of past time steps that are fed to the forecasting module at prediction time. - output_chunk_length : int + output_chunk_length Decoder length; number of future time steps that are fed to the forecasting module at prediction time. - hidden_size : int + hidden_size Hidden state size of the TFT. It is the main hyper-parameter and common across the internal TFT architecture. - lstm_layers : int + lstm_layers Number of layers for the Long Short Term Memory (LSTM) Encoder and Decoder (1 is a good default). - num_attention_heads : int + num_attention_heads Number of attention heads (4 is a good default) - full_attention : bool + full_attention If ``True``, applies multi-head attention query on past (encoder) and future (decoder) parts. Otherwise, only queries on future part. Defaults to ``False``. - feed_forward : str + feed_forward A feedforward network is a fully-connected layer with an activation. TFT Can be one of the glu variant's FeedForward Network (FFN)[2]. The glu variant's FeedForward Network are a series of FFNs designed to work - better with Transformer based models. Defaults to ``"GatedResidualNetwork"``. - ["GLU", "Bilinear", "ReGLU", "GEGLU", "SwiGLU", "ReLU", "GELU"] - or the TFT original FeedForward Network. - ["GatedResidualNetwork"] - dropout : float + better with Transformer based models. Defaults to ``"GatedResidualNetwork"``. ["GLU", "Bilinear", "ReGLU", + "GEGLU", "SwiGLU", "ReLU", "GELU"] or the TFT original FeedForward Network ["GatedResidualNetwork"]. + dropout Fraction of neurons affected by dropout. This is compatible with Monte Carlo dropout at inference time for model uncertainty estimation (enabled with ``mc_dropout=True`` at prediction time). - hidden_continuous_size : int + hidden_continuous_size Default for hidden size for processing continuous variables - categorical_embedding_sizes : dict - A dictionary containing embedding sizes for categorical static covariates. The keys are the column names - of the categorical static covariates. The values are tuples of integers with - `(number of unique categories, embedding size)`. For example `{"some_column": (64, 8)}`. - Note that `TorchForecastingModels` can only handle numeric data. Consider transforming/encoding your data + categorical_embedding_sizes + A dictionary used to construct embeddings for categorical static covariates. The keys are the column names + of the categorical static covariates. Each value is either a single integer or a tuple of integers. + For a single integer give the number of unique categories (n) of the corresponding variable. For example + ``{"some_column": 64}``. The embedding size will be automatically determined by + ``min(round(1.6 * n**0.56), 100)``. + For a tuple of integers, give (number of unique categories, embedding size). For example + ``{"some_column": (64, 8)}``. + Note that ``TorchForecastingModels`` only support numeric data. Consider transforming/encoding your data with `darts.dataprocessing.transformers.static_covariates_transformer.StaticCovariatesTransformer`. - add_relative_index : bool + add_relative_index Whether to add positional values to future covariates. Defaults to ``False``. - This allows to use the TFTModel without having to pass future_covariates to :fun:`fit()` and + This allows to use the TFTModel without having to pass future_covariates to :func:`fit()` and :func:`train()`. It gives a value to the position of each step from input and output chunk relative to the prediction point. The values are normalized with ``input_chunk_length``. - loss_fn : nn.Module + loss_fn PyTorch loss function used for training. By default, the TFT model is probabilistic and uses a ``likelihood`` instead (``QuantileRegression``). To make the model deterministic, you can set the ` `likelihood`` to None and give a ``loss_fn`` argument. - torch_metrics - A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found - at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. likelihood The likelihood model to be used for probabilistic forecasts. By default, the TFT uses a ``QuantileRegression`` likelihood. @@ -708,6 +710,9 @@ def __init__( Optional arguments to initialize the pytorch_lightning.Module, pytorch_lightning.Trainer, and Darts' :class:`TorchForecastingModel`. + torch_metrics + A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found + at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. optimizer_cls The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. optimizer_kwargs @@ -990,11 +995,20 @@ def _create_model(self, train_sample: MixedCovariatesTrainTensorType) -> nn.Modu static_input_numeric.append(static_var) reals_input.append(static_var) else: + # get embedding sizes for each categorical variable + embedding = self.categorical_embedding_sizes[col_name] + raise_if_not( + isinstance(embedding, (int, tuple)), + "Dict values of `categorical_embedding_sizes` must either be integers or tuples. Read " + "the TFTModel documentation for more information.", + logger, + ) + if isinstance(embedding, int): + embedding = (embedding, get_embedding_size(n=embedding)) + categorical_embedding_sizes[vars_meta[idx]] = embedding + static_input_categorical.append(static_var) categorical_input.append(static_var) - categorical_embedding_sizes[ - vars_meta[idx] - ] = self.categorical_embedding_sizes[col_name] variables_meta["model_config"]["reals_input"] = list(dict.fromkeys(reals_input)) variables_meta["model_config"]["categorical_input"] = list( diff --git a/darts/models/forecasting/transformer_model.py b/darts/models/forecasting/transformer_model.py index e33768b2e2..203ed9d7bc 100644 --- a/darts/models/forecasting/transformer_model.py +++ b/darts/models/forecasting/transformer_model.py @@ -257,10 +257,8 @@ def __init__( The activation function of encoder/decoder intermediate layer, (default='relu'). can be one of the glu variant's FeedForward Network (FFN)[2]. A feedforward network is a fully-connected layer with an activation. The glu variant's FeedForward Network are a series - of FFNs designed to work better with Transformer based models. - ["GLU", "Bilinear", "ReGLU", "GEGLU", "SwiGLU", "ReLU", "GELU"] - or one the pytorch internal activations - ["relu", "gelu"] + of FFNs designed to work better with Transformer based models. ["GLU", "Bilinear", "ReGLU", "GEGLU", + "SwiGLU", "ReLU", "GELU"] or one the pytorch internal activations ["relu", "gelu"] custom_encoder A custom user-provided encoder module for the transformer (default=None). custom_decoder @@ -273,12 +271,12 @@ def __init__( PyTorch loss function used for training. This parameter will be ignored for probabilistic models if the ``likelihood`` parameter is specified. Default: ``torch.nn.MSELoss()``. - torch_metrics - A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found - at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. likelihood One of Darts' :meth:`Likelihood ` models to be used for probabilistic forecasts. Default: ``None``. + torch_metrics + A torch metric or a ``MetricCollection`` used for evaluation. A full list of available metrics can be found + at https://torchmetrics.readthedocs.io/en/latest/. Default: ``None``. optimizer_cls The PyTorch optimizer class to be used. Default: ``torch.optim.Adam``. optimizer_kwargs From 81be52615699cbf18b74af484416329daa5dd4aa Mon Sep 17 00:00:00 2001 From: dennisbader Date: Wed, 20 Jul 2022 20:37:29 +0200 Subject: [PATCH 16/16] added test for TFTModel categorical static covariate support --- darts/tests/models/forecasting/test_TFT.py | 32 ++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/darts/tests/models/forecasting/test_TFT.py b/darts/tests/models/forecasting/test_TFT.py index 9f378dc7d1..b66f818d7f 100644 --- a/darts/tests/models/forecasting/test_TFT.py +++ b/darts/tests/models/forecasting/test_TFT.py @@ -14,6 +14,7 @@ from torch.nn import MSELoss from darts.models.forecasting.tft_model import TFTModel + from darts.models.forecasting.tft_submodels import get_embedding_size from darts.utils.likelihood_models import QuantileRegression TORCH_AVAILABLE = True @@ -171,22 +172,49 @@ def test_static_covariates_support(self): ) target_multi = target_multi.with_static_covariates( - pd.DataFrame([[0.0, 1.0], [2.0, 3.0]], index=["st1", "st2"]) + pd.DataFrame( + [[0.0, 1.0, 0, 2], [2.0, 3.0, 1, 3]], + columns=["st1", "st2", "cat1", "cat2"], + ) ) # should work with cyclic encoding for time index + # set categorical embedding sizes once with automatic embedding size with an `int` and once by + # manually setting it with `tuple(int, int)` model = TFTModel( input_chunk_length=3, output_chunk_length=4, add_encoders={"cyclic": {"future": "hour"}}, + categorical_embedding_sizes={"cat1": 2, "cat2": (2, 2)}, pl_trainer_kwargs={"fast_dev_run": True}, ) model.fit(target_multi, verbose=False) + assert len(model.model.static_variables) == len( target_multi.static_covariates.columns ) - model.predict(n=1, series=target_multi, verbose=False) + # check model embeddings + target_embedding = { + "static_covariate_2": ( + 2, + get_embedding_size(2), + ), # automatic embedding size + "static_covariate_3": (2, 2), # manual embedding size + } + assert model.categorical_embedding_sizes == target_embedding + for cat_var, embedding_dims in target_embedding.items(): + assert ( + model.model.input_embeddings.embeddings[cat_var].num_embeddings + == embedding_dims[0] + ) + assert ( + model.model.input_embeddings.embeddings[cat_var].embedding_dim + == embedding_dims[1] + ) + + preds = model.predict(n=1, series=target_multi, verbose=False) + assert preds.static_covariates.equals(target_multi.static_covariates) # raise an error when trained with static covariates of wrong dimensionality target_multi = target_multi.with_static_covariates(