Feat/stochastic inputs (#833)

hrzn · web-flow · commit ae47aba8d946 · 2022-03-08T13:58:23.000+01:00
* Use stochastic samples for training/inference of torch models

* Fix typo

* add a unit test
diff --git a/darts/tests/models/forecasting/test_probabilistic_models.py b/darts/tests/models/forecasting/test_probabilistic_models.py
@@ -242,3 +242,24 @@ def _get_avgs(series):
                     "The difference between the mean forecast and the mean series is larger "
                     "than expected on component 1 for distribution {}".format(lkl),
                 )
+
+        def test_stochastic_inputs(self):
+            model = RNNModel(input_chunk_length=5)
+            model.fit(self.constant_ts, epochs=2)
+
+            # build a stochastic series
+            target_vals = self.constant_ts.values()
+            stochastic_vals = np.random.normal(
+                loc=target_vals, scale=1.0, size=(len(self.constant_ts), 100)
+            )
+            stochastic_vals = np.expand_dims(stochastic_vals, axis=1)
+            stochastic_series = TimeSeries.from_times_and_values(
+                self.constant_ts.time_index, stochastic_vals
+            )
+
+            # A deterministic model forecasting a stochastic series
+            # should return stochastic samples
+            preds = [model.predict(series=stochastic_series, n=10) for _ in range(2)]
+
+            # random samples should differ
+            self.assertFalse(np.alltrue(preds[0].values() == preds[1].values()))
diff --git a/darts/timeseries.py b/darts/timeseries.py
@@ -1176,6 +1176,28 @@ def values(self, copy=True, sample=0) -> np.ndarray:
         else:
             return self._xa.values[:, :, sample]
 
+    def random_component_values(self, copy=True) -> np.array:
+        """
+        Return a 2-D array of shape (time, component), containing the values for
+        one sample taken uniformly at random among this series' samples.
+
+        Parameters
+        ----------
+        copy
+            Whether to return a copy of the values, otherwise returns a view.
+            Leave it to True unless you know what you are doing.
+
+        Returns
+        -------
+        numpy.ndarray
+            The values composing one sample taken at random from the time series.
+        """
+        sample = np.random.randint(low=0, high=self.n_samples)
+        if copy:
+            return np.copy(self._xa.values[:, :, sample])
+        else:
+            return self._xa.values[:, :, sample]
+
     def all_values(self, copy=True) -> np.ndarray:
         """
         Return a 3-D array of dimension (time, component, sample),
diff --git a/darts/utils/data/horizon_based_dataset.py b/darts/utils/data/horizon_based_dataset.py
@@ -109,7 +109,7 @@ def __getitem__(
         # determine the index of the time series.
         ts_idx = idx // self.nr_samples_per_ts
         ts_target = self.target_series[ts_idx]
-        target_vals = ts_target.values(copy=False)
+        target_vals = ts_target.random_component_values(copy=False)
 
         raise_if_not(
             len(target_vals)
@@ -168,7 +168,9 @@ def __getitem__(
                 f"({idx}-th sample)",
             )
 
-            covariate = ts_covariate.values(copy=False)[cov_start:cov_end]
+            covariate = ts_covariate.random_component_values(copy=False)[
+                cov_start:cov_end
+            ]
 
             raise_if_not(
                 len(covariate) == len(past_target),
diff --git a/darts/utils/data/inference_dataset.py b/darts/utils/data/inference_dataset.py
@@ -163,7 +163,9 @@ def __getitem__(
         )
 
         # extract past target values
-        past_target = target_series.values(copy=False)[-self.input_chunk_length :]
+        past_target = target_series.random_component_values(copy=False)[
+            -self.input_chunk_length :
+        ]
 
         # optionally, extract covariates
         cov_past, cov_future = None, None
@@ -181,7 +183,9 @@ def __getitem__(
             )
 
             # extract covariate values and split into a past (historic) and future part
-            covariate = covariate_series.values(copy=False)[cov_start:cov_end]
+            covariate = covariate_series.random_component_values(copy=False)[
+                cov_start:cov_end
+            ]
             if self.input_chunk_length != 0:  # regular models
                 cov_past, cov_future = (
                     covariate[: self.input_chunk_length],
diff --git a/darts/utils/data/shifted_dataset.py b/darts/utils/data/shifted_dataset.py
@@ -522,7 +522,7 @@ def __getitem__(self, idx) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray
         # determine the index of the time series.
         ts_idx = idx // self.max_samples_per_ts
         ts_target = self.target_series[ts_idx]
-        target_vals = ts_target.values(copy=False)
+        target_vals = ts_target.random_component_values(copy=False)
 
         # determine the actual number of possible samples in this time series
         n_samples_in_ts = len(target_vals) - self.size_of_both_chunks + 1
@@ -582,7 +582,9 @@ def __getitem__(self, idx) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray
                 f"that don't extend far enough into the future. ({idx}-th sample)",
             )
 
-            covariate = ts_covariate.values(copy=False)[cov_start:cov_end]
+            covariate = ts_covariate.random_component_values(copy=False)[
+                cov_start:cov_end
+            ]
 
             raise_if_not(
                 len(covariate)