Merge branch 'fix/unit8co#1101' of https://github.com/rijkvandermeulen/darts into fix/unit8co#1101

Rijk van der Meulen · Rijk van der Meulen · commit c9914e30bfa1 · 2022-08-08T17:00:53.000+02:00
diff --git a/darts/dataprocessing/dtw/dtw.py b/darts/dataprocessing/dtw/dtw.py
@@ -2,9 +2,12 @@
 from typing import Callable, Union
 
 import numpy as np
+import pandas as pd
+import xarray as xr
 
 from darts import TimeSeries
 from darts.logging import get_logger, raise_if, raise_if_not
+from darts.timeseries import DIMS
 
 from .cost_matrix import CostMatrix
 from .window import CRWindow, NoWindow, Window
@@ -203,25 +206,34 @@ def warped(self) -> (TimeSeries, TimeSeries):
             Two new TimeSeries instances of the same length, indexed by pd.RangeIndex.
         """
 
-        series1 = self.series1
-        series2 = self.series2
-
-        xa1 = series1.data_array(copy=False)
-        xa2 = series2.data_array(copy=False)
-
+        xa1 = self.series1.data_array(copy=False)
+        xa2 = self.series2.data_array(copy=False)
         path = self.path()
 
-        warped_series1 = xa1[path[:, 0]]
-        warped_series2 = xa2[path[:, 1]]
-
-        time_dim1 = series1._time_dim
-        time_dim2 = series2._time_dim
+        values1, values2 = xa1.values[path[:, 0]], xa2.values[path[:, 1]]
+
+        # We set a RangeIndex for both series:
+        warped_series1 = xr.DataArray(
+            data=values1,
+            dims=xa1.dims,
+            coords={
+                self.series1._time_dim: pd.RangeIndex(values1.shape[0]),
+                DIMS[1]: xa1.coords[DIMS[1]],
+            },
+            attrs=xa1.attrs,
+        )
 
-        range_index = True
+        warped_series2 = xr.DataArray(
+            data=values2,
+            dims=xa2.dims,
+            coords={
+                self.series2._time_dim: pd.RangeIndex(values2.shape[0]),
+                DIMS[1]: xa2.coords[DIMS[1]],
+            },
+            attrs=xa2.attrs,
+        )
 
-        if range_index:
-            warped_series1 = warped_series1.reset_index(dims_or_levels=time_dim1)
-            warped_series2 = warped_series2.reset_index(dims_or_levels=time_dim2)
+        time_dim1, time_dim2 = self.series1._time_dim, self.series2._time_dim
 
         # todo: prevent time information being lost after warping
         # Applying time index from series1 to series2 (take_dates = True) is disabled for consistency reasons
diff --git a/darts/models/components/transformer.py b/darts/models/components/transformer.py
@@ -0,0 +1,56 @@
+import torch
+import torch.nn as nn
+
+from darts.utils.torch import MonteCarloDropout
+
+
+class CustomFeedForwardEncoderLayer(nn.TransformerEncoderLayer):
+    """Overwrites the PyTorch TransformerEncoderLayer to use Darts' Position-wise Feed-Forward variants."""
+
+    def __init__(self, ffn: nn.Module, dropout: float, *args, **kwargs):
+        """
+        Parameters
+        ----------
+        ffn
+            One of Darts' Position-wise Feed-Forward Network variants from darts.models.components.glu_variants
+        dropout
+            Fraction of neurons affected by Dropout (default=0.1).
+        args
+            positional arguments from torch.nn.TransformerEncoderLayer.
+        kwargs
+            keyword arguments from torch.nn.TransformerEncoderLayer. `activation` will have no effect.
+        """
+        super().__init__(*args, **kwargs)
+        self.ffn = ffn
+        self.dropout = MonteCarloDropout(dropout)
+
+    # overwrite the feed forward block
+    def _ff_block(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ffn(x)
+        return self.dropout(x)
+
+
+class CustomFeedForwardDecoderLayer(nn.TransformerDecoderLayer):
+    """Overwrites the PyTorch TransformerDecoderLayer to use Darts' custom Position Wise Feed Forward Layers."""
+
+    def __init__(self, ffn: nn.Module, dropout: float, *args, **kwargs):
+        """
+        Parameters
+        ----------
+        ffn
+            One of Darts' Position-wise Feed-Forward Network variants from darts.models.components.glu_variants
+        dropout
+            Fraction of neurons affected by Dropout (default=0.1).
+        args
+            positional arguments from torch.nn.TransformerEncoderLayer.
+        kwargs
+            keyword arguments from torch.nn.TransformerEncoderLayer. `activation` will have no effect.
+        """
+        super().__init__(*args, **kwargs)
+        self.ffn = ffn
+        self.dropout = MonteCarloDropout(dropout)
+
+    # overwrite the feed forward block
+    def _ff_block(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.ffn(x)
+        return self.dropout(x)
diff --git a/darts/models/forecasting/torch_forecasting_model.py b/darts/models/forecasting/torch_forecasting_model.py
@@ -1355,7 +1355,7 @@ def load_model(path: str) -> "TorchForecastingModel":
         path_ptl_ckpt = base_path + "_ptl-ckpt.pth.tar"
         if os.path.exists(path_ptl_ckpt):
             model.model = model.model.__class__.load_from_checkpoint(path_ptl_ckpt)
-            model.trainer = model.model.trainer
+            model.trainer = None
 
         return model
 
diff --git a/darts/models/forecasting/transformer_model.py b/darts/models/forecasting/transformer_model.py
@@ -9,9 +9,13 @@
 import torch
 import torch.nn as nn
 
-from darts.logging import get_logger, raise_if_not
+from darts.logging import get_logger, raise_if, raise_if_not
 from darts.models.components import glu_variants
 from darts.models.components.glu_variants import GLU_FFN
+from darts.models.components.transformer import (
+    CustomFeedForwardDecoderLayer,
+    CustomFeedForwardEncoderLayer,
+)
 from darts.models.forecasting.pl_forecasting_module import PLPastCovariatesModule
 from darts.models.forecasting.torch_forecasting_model import PastCovariatesTorchModel
 
@@ -22,6 +26,34 @@
 FFN = GLU_FFN + BUILT_IN
 
 
+def _generate_coder(
+    d_model, dim_ff, dropout, nhead, num_layers, coder_cls, layer_cls, ffn_cls
+):
+    """Generates an Encoder or Decoder with one of Darts' Feed-forward Network variants.
+    Parameters
+    ----------
+    coder_cls
+        Either `torch.nn.TransformerEncoder` or `...TransformerDecoder`
+    layer_cls
+        Either `darts.models.components.transformer.CustomFeedForwardEncoderLayer` or
+        `...CustomFeedForwardDecoderLayer`
+    ffn_cls
+        One of Darts' Position-wise Feed-Forward Network variants `from darts.models.components.glu_variants`
+    """
+    layer = layer_cls(
+        ffn=ffn_cls(d_model=d_model, d_ff=dim_ff, dropout=dropout),
+        dropout=dropout,
+        d_model=d_model,
+        nhead=nhead,
+        dim_feedforward=dim_ff,
+    )
+    return coder_cls(
+        layer,
+        num_layers=num_layers,
+        norm=nn.LayerNorm(d_model),
+    )
+
+
 # This implementation of positional encoding is taken from the PyTorch documentation:
 # https://pytorch.org/tutorials/beginner/transformer_tutorial.html
 class _PositionalEncoding(nn.Module):
@@ -142,13 +174,39 @@ def __init__(
 
         raise_if_not(activation in FFN, f"'{activation}' is not in {FFN}")
         if activation in GLU_FFN:
-            # use glu variant feedforward layers
-            self.activation = getattr(glu_variants, activation)(
-                d_model=d_model, d_ff=dim_feedforward, dropout=dropout
+            raise_if(
+                custom_encoder is not None or custom_decoder is not None,
+                "Cannot use `custom_encoder` or `custom_decoder` along with an `activation` from "
+                f"{GLU_FFN}",
+                logger=logger,
+            )
+            # use glu variant feed-forward layers
+            ffn_cls = getattr(glu_variants, activation)
+
+            # custom feed-forward layers have activation built-in. reset activation
+            activation = None
+
+            custom_encoder = _generate_coder(
+                d_model,
+                dim_feedforward,
+                dropout,
+                nhead,
+                num_encoder_layers,
+                nn.TransformerEncoder,
+                CustomFeedForwardEncoderLayer,
+                ffn_cls,
+            )
+
+            custom_decoder = _generate_coder(
+                d_model,
+                dim_feedforward,
+                dropout,
+                nhead,
+                num_decoder_layers,
+                nn.TransformerDecoder,
+                CustomFeedForwardDecoderLayer,
+                ffn_cls,
             )
-        else:
-            # use nn.Transformer built in feedforward layers
-            self.activation = activation
 
         # Defining the Transformer module
         self.transformer = nn.Transformer(
@@ -158,7 +216,7 @@ def __init__(
             num_decoder_layers=num_decoder_layers,
             dim_feedforward=dim_feedforward,
             dropout=dropout,
-            activation=self.activation,
+            activation=activation,
             custom_encoder=custom_encoder,
             custom_decoder=custom_decoder,
         )
diff --git a/darts/tests/models/forecasting/test_transformer_model.py b/darts/tests/models/forecasting/test_transformer_model.py
@@ -11,6 +11,12 @@
 logger = get_logger(__name__)
 
 try:
+    import torch.nn as nn
+
+    from darts.models.components.transformer import (
+        CustomFeedForwardDecoderLayer,
+        CustomFeedForwardEncoderLayer,
+    )
     from darts.models.forecasting.transformer_model import (
         TransformerModel,
         _TransformerModule,
@@ -118,14 +124,28 @@ def test_activations(self):
                 )
                 model1.fit(self.series, epochs=1)
 
-            # internal activation function
+            # internal activation function uses PyTorch TransformerEncoderLayer
             model2 = TransformerModel(
                 input_chunk_length=1, output_chunk_length=1, activation="gelu"
             )
             model2.fit(self.series, epochs=1)
+            assert isinstance(
+                model2.model.transformer.encoder.layers[0], nn.TransformerEncoderLayer
+            )
+            assert isinstance(
+                model2.model.transformer.decoder.layers[0], nn.TransformerDecoderLayer
+            )
 
-            # glue variant FFN
+            # glue variant FFN uses our custom _FeedForwardEncoderLayer
             model3 = TransformerModel(
                 input_chunk_length=1, output_chunk_length=1, activation="SwiGLU"
             )
             model3.fit(self.series, epochs=1)
+            assert isinstance(
+                model3.model.transformer.encoder.layers[0],
+                CustomFeedForwardEncoderLayer,
+            )
+            assert isinstance(
+                model3.model.transformer.decoder.layers[0],
+                CustomFeedForwardDecoderLayer,
+            )
diff --git a/darts/tests/test_timeseries.py b/darts/tests/test_timeseries.py
@@ -105,6 +105,31 @@ def test_integer_indexing(self):
             list(indexed_ts.time_index) == list(pd.RangeIndex(2, 7, step=1))
         )
 
+    def test_univariate_component(self):
+        series = TimeSeries.from_values(np.array([10, 20, 30])).with_columns_renamed(
+            "0", "component"
+        )
+        mseries = concatenate([series] * 3, axis="component")
+        mseries = mseries.with_hierarchy(
+            {"component_1": ["component"], "component_2": ["component"]}
+        )
+
+        static_cov = pd.DataFrame(
+            {"dim0": [1, 2, 3], "dim1": [-2, -1, 0], "dim2": [0.0, 0.1, 0.2]}
+        )
+
+        mseries = mseries.with_static_covariates(static_cov)
+
+        for univ_series in [
+            mseries.univariate_component(1),
+            mseries.univariate_component("component_1"),
+        ]:
+            # hierarchy should be dropped
+            self.assertIsNone(univ_series.hierarchy)
+
+            # only the right static covariate column should be retained
+            self.assertEqual(univ_series.static_covariates.sum().sum(), 1.1)
+
     def test_column_names(self):
         # test the column names resolution
         columns_before = [
diff --git a/darts/timeseries.py b/darts/timeseries.py
@@ -649,7 +649,7 @@ def from_dataframe(
         else:
             raise_if_not(
                 isinstance(df.index, VALID_INDEX_TYPES),
-                "If time_col is not specified, the DataFrame must be indexed either with"
+                "If time_col is not specified, the DataFrame must be indexed either with "
                 "a DatetimeIndex, or with a RangeIndex.",
                 logger,
             )
@@ -2702,6 +2702,9 @@ def univariate_component(self, index: Union[str, int]) -> "TimeSeries":
         Retrieve one of the components of the series
         and return it as new univariate ``TimeSeries`` instance.
 
+        This drops the hierarchy (if any), and retains only the relevant static
+        covariates column.
+
         Parameters
         ----------
         index
@@ -2713,11 +2716,8 @@ def univariate_component(self, index: Union[str, int]) -> "TimeSeries":
         TimeSeries
             A new univariate TimeSeries instance.
         """
-        if isinstance(index, int):
-            new_xa = self._xa.isel(component=index).expand_dims(DIMS[1], axis=1)
-        else:
-            new_xa = self._xa.sel(component=index).expand_dims(DIMS[1], axis=1)
-        return self.__class__(new_xa)
+
+        return self[index if isinstance(index, str) else self.components[index]]
 
     def add_datetime_attribute(
         self, attribute, one_hot: bool = False, cyclic: bool = False
diff --git a/docs/userguide/timeseries.md b/docs/userguide/timeseries.md
@@ -33,7 +33,7 @@ In addition, some models can work on *multiple time series*, meaning that they c
 
 * **Example of a multivariate series:** The blood pressure and heart rate of a single patient over time (one multivariate series with 2 components).
 
-* **Example of multiple series:** The blood pressure and heart rate of multiple patients; potentially measured at different times for different patients (one univariate series per patient).
+* **Example of multiple series:** The blood pressure and heart rate of multiple patients; potentially measured at different times for different patients (one multivariate series with 2 components per patient).
 
 
 ### Should I use a multivariate series or multiple series for my problem?
@@ -50,9 +50,9 @@ In Darts, probabilistic forecasts are represented by drawing Monte Carlo samples
 ## Creating `TimeSeries`
 `TimeSeries` objects can be created using factory methods, for example:
 
-* [TimeSeries.from_dataframe()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_dataframe) can create `TimeSeries` from a Pandas Dataframe having one or several columns representing values (several columns would correspond to a multivariate series).
+* [TimeSeries.from_dataframe()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_dataframe) can create `TimeSeries` from a Pandas Dataframe having one or several columns representing values (columns correspond to components, and several columns would correspond to a multivariate series).
 
-* [TimeSeries.from_values()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_values) can create `TimeSeries` from a 2-D or 3-D NumPy array. It will generate an integer-based time index (of type `pandas.RangeIndex`). 2-D corresponds to deterministic (potentially multivariate) series, and 3-D to stochastic series.
+* [TimeSeries.from_values()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_values) can create `TimeSeries` from a 1-D, 2-D or 3-D NumPy array. It will generate an integer-based time index (of type `pandas.RangeIndex`). 1-D corresponds to univariate deterministic series, 2-D to multivariate deterministic series, and 3-D to multivariate stochastic series.
 
 * [TimeSeries.from_times_and_values()](https://unit8co.github.io/darts/generated_api/darts.timeseries.html#darts.timeseries.TimeSeries.from_times_and_values) is similar to `TimeSeries.from_values()` but also accepts a time index.
 
@@ -67,7 +67,7 @@ my_multivariate_series = concatenate([series1, series2, ...], axis=1)
 produces a multivariate series from some series that share the same time axis.
 
 ## Implementation
-Behind the scenes, `TimeSeries` is wrapping around a 3-dimensional `xarray.DataArray` object. The dimensions are *(time, component, sample)*, where the size of the *component* dimension is larger than 1 for multivariate series and the size of the *sample* dimension is larger than 1 for stochastic series. The `DataArray` is itself backed by a a 3-dimensional NumPy array, and it has a time index (either `pandas.DatetimeIndex` or `pandas.RangeIndex`) on the *time* dimension and another `pandas.Index` on the *component* (or "columns") dimension. `TimeSeries` is intended to be immutable.
+Behind the scenes, `TimeSeries` is wrapping around a 3-dimensional `xarray.DataArray` object. The dimensions are *(time, component, sample)*, where the size of the *component* dimension is larger than 1 for multivariate series and the size of the *sample* dimension is larger than 1 for stochastic series. The `DataArray` is itself backed by a 3-dimensional NumPy array, and it has a time index (either `pandas.DatetimeIndex` or `pandas.RangeIndex`) on the *time* dimension and another `pandas.Index` on the *component* (or "columns") dimension. `TimeSeries` is intended to be immutable and most operations return new `TimeSeries` objects.
 
 ## Exporting data from a `TimeSeries`
 `TimeSeries` objects offer a few ways to export the data, for example:
diff --git a/requirements/core.txt b/requirements/core.txt
@@ -12,7 +12,7 @@ prophet>=1.1
 requests>=2.22.0
 scikit-learn>=1.0.1
 scipy>=1.3.2
-statsforecast>=0.5.2
+statsforecast==0.6.0
 statsmodels>=0.13.0
 tbats>=1.1.0
 tqdm>=4.60.0