minor edits

Onno Kampman · Onno Kampman · commit a3fe789d6961 · 2024-03-15T18:17:40.000+08:00
diff --git a/fcest/models/likelihoods.py b/fcest/models/likelihoods.py
@@ -28,7 +28,7 @@
 
 class WishartProcessLikelihoodBase(MonteCarloLikelihood):
     """
-    Class for Wishart process likelihoods.
+    Abstract class for all Wishart process likelihoods.
     """
 
     def __init__(
@@ -40,6 +40,7 @@ def __init__(
     ):
         """
         Initialize the base Wishart process likelihood.
+        This implementation assumes the input is uni-dimensional.
 
         Parameters
         ----------
@@ -49,6 +50,7 @@ def __init__(
             Degrees of freedom.
         :param num_mc_samples:
             Number of Monte Carlo samples used to approximate gradients (S).
+            Sometimes also denoted as R.
         """
         if num_factors is not None:
             latent_dim = num_factors * nu
@@ -76,7 +78,7 @@ def __init__(
         D: int,
         nu: int = None,
         num_mc_samples: int = 2,
-        A_scale_matrix_option: str = 'train_full_matrix',
+        scale_matrix_cholesky_option: str = 'train_full_matrix',
         train_additive_noise: bool = False,
         additive_noise_matrix_init: float = 0.01,
         verbose: bool = True,
@@ -92,7 +94,7 @@ def __init__(
             Degrees of freedom.
         :param num_mc_samples:
             Number of Monte Carlo samples used to approximate gradients (S).
-        :param A_scale_matrix_option:
+        :param scale_matrix_cholesky_option:
         :param train_additive_noise:
             Whether to train the additive noise matrix (Lambda).
         :param additive_noise_matrix_init:
@@ -109,7 +111,9 @@ def __init__(
             nu=nu,
             num_mc_samples=num_mc_samples,
         )
-        self.A_scale_matrix = self._set_A_scale_matrix(option=A_scale_matrix_option)  # (D, D)
+        self.A_scale_matrix = self._set_A_scale_matrix(
+            option=scale_matrix_cholesky_option
+        )  # (D, D)
 
         # The additive noise matrix must have positive diagonal values, which this softplus construction guarantees.
         additive_noise_matrix_init = np.log(
@@ -122,7 +126,7 @@ def __init__(
         )  # (D, )
 
         if verbose:
-            logging.info(f"A scale matrix option is '{A_scale_matrix_option:s}'.")
+            logging.info(f"Scale matrix Cholesky (matrix A) option is '{scale_matrix_cholesky_option:s}'.")
             print('A_scale_matrix: ', self.A_scale_matrix)
             print('initial additive part: ', self.additive_part)
 
@@ -207,13 +211,14 @@ def _log_prob(
         # compute the constant term of the log likelihood
         constant_term = - self.D / 2 * tf.math.log(2 * tf.constant(np.pi, dtype=tf.float64))
 
-        # compute the `log(det(AFFA))` component of the log likelihood
+        # compute the AFFA component of the log likelihood - our construction of \Sigma
         # TODO: this does not work for nu != D
         # af = tf.matmul(self.A_scale_matrix, f_sample)  # (S, N, D, nu)
         af = tf.multiply(self.A_scale_matrix, f_sample)
-
-        affa = tf.matmul(af, af, transpose_b=True)  # (S, N, D, D) - our construction of \Sigma
+        affa = tf.matmul(af, af, transpose_b=True)  # (S, N, D, D)
         affa = self._add_diagonal_additive_noise(affa)  # (S, N, D, D)
+
+        # compute the `log(det(AFFA))` component of the log likelihood
         # Before, the trainable additive noise sometimes broke the Cholesky decomposition.
         # This did not happen again after forcing it to be positive.
         # TODO: Can adding positive values to the diagonal ever make a PSD matrix become non-PSD?
@@ -224,7 +229,9 @@ def _log_prob(
             print(self.additive_part)
             print(e)
         log_det_affa = 2 * tf.math.reduce_sum(
-            tf.math.log(tf.linalg.diag_part(L)),
+            tf.math.log(
+                tf.linalg.diag_part(L)
+            ),
             axis=2
         )  # (S, N)
 
@@ -317,7 +324,7 @@ def __init__(
         nu: int = None,
         num_mc_samples: int = 2,
         num_factors: int = None,
-        A_scale_matrix_option: str = 'train_full_matrix',
+        scale_matrix_cholesky_option: str = 'train_full_matrix',
         train_additive_noise: bool = False,
         additive_noise_matrix_init: float = 0.01,
         verbose: bool = True,
@@ -330,3 +337,30 @@ def __init__(
         )
 
         raise NotImplementedError("Factorized Wishart process not implemented yet.")
+
+    def _log_prob(
+        self,
+        x_data: np.array,
+        f_sample: tf.Tensor,
+        y_data: np.array,
+    ) -> tf.Tensor:
+        """
+        Compute the (Monte Carlo estimate of) the log likelihood given samples of the GPs.
+
+        This overrides the method in MonteCarloLikelihood.
+
+        Parameters
+        ----------
+        :param x_data:
+            Input tensor.
+            NumPy array of shape (num_time_steps, 1) or (N, 1).
+        :param f_sample:
+            Function evaluation tensor.
+            (num_mc_samples, num_time_steps, num_factors, degrees_of_freedom) or (S, N, K, nu) -
+        :param y_data:
+            Observation tensor.
+            (num_time_steps, num_time_series) or (N, D) -
+        :return:
+            (num_time_steps, ) or (N, )
+        """
+        assert isinstance(f_sample, tf.Tensor)
diff --git a/fcest/models/wishart_process.py b/fcest/models/wishart_process.py
@@ -53,7 +53,7 @@ def __init__(
         nu: int = None,
         kernel: Kernel = None,
         num_mc_samples: int = 5,
-        A_scale_matrix_option: str = 'train_full_matrix',
+        scale_matrix_cholesky_option: str = 'train_full_matrix',
         train_additive_noise: bool = True,
         kernel_lengthscale_init: float = 0.3,
         q_sqrt_init: float = 0.001,
@@ -78,7 +78,7 @@ def __init__(
         :param num_mc_samples:
             The number of Monte Carlo samples used to approximate the ELBO.
             In the paper this is R, in the code sometimes S.
-        :param A_scale_matrix_option:
+        :param scale_matrix_cholesky_option:
             We found that training the full matrix yields the best results.
         :param train_additive_noise:
         :param kernel_lengthscale_init:
@@ -105,7 +105,7 @@ def __init__(
                 D=self.D,
                 nu=nu,
                 num_mc_samples=num_mc_samples,
-                A_scale_matrix_option=A_scale_matrix_option,
+                scale_matrix_cholesky_option=scale_matrix_cholesky_option,
                 train_additive_noise=train_additive_noise,
                 num_factors=num_factors,
             )
@@ -114,7 +114,7 @@ def __init__(
                 D=self.D,
                 nu=nu,
                 num_mc_samples=num_mc_samples,
-                A_scale_matrix_option=A_scale_matrix_option,
+                scale_matrix_cholesky_option=scale_matrix_cholesky_option,
                 train_additive_noise=train_additive_noise,
             )
         super().__init__(
@@ -338,7 +338,7 @@ def __init__(
         nu: int = None,
         kernel: Kernel = gpflow.kernels.Matern52(),
         num_mc_samples: int = 5,
-        A_scale_matrix_option: str = 'train_full_matrix',
+        scale_matrix_cholesky_option: str = 'train_full_matrix',
         train_additive_noise: bool = True,
         kernel_lengthscale_init: float = 0.3,
         q_sqrt_init: float = 0.001,
@@ -362,7 +362,7 @@ def __init__(
         :param kernel:
         :param num_mc_samples:
             Number of Monte Carlo samples taken to approximate the ELBO.
-        :param A_scale_matrix_option:
+        :param scale_matrix_cholesky_option:
         :param train_additive_noise:
         :param kernel_lengthscale_init:
         :param q_sqrt_init:
@@ -387,7 +387,7 @@ def __init__(
                 D=self.D,
                 nu=nu,
                 num_mc_samples=num_mc_samples,
-                A_scale_matrix_option=A_scale_matrix_option,
+                scale_matrix_cholesky_option=scale_matrix_cholesky_option,
                 train_additive_noise=train_additive_noise,
                 num_factors=num_factors,
                 verbose=verbose,
@@ -397,7 +397,7 @@ def __init__(
                 D=self.D,
                 nu=nu,
                 num_mc_samples=num_mc_samples,
-                A_scale_matrix_option=A_scale_matrix_option,
+                scale_matrix_cholesky_option=scale_matrix_cholesky_option,
                 train_additive_noise=train_additive_noise,
                 verbose=verbose,
             )
diff --git a/tests_requirements.txt b/tests_requirements.txt
@@ -12,3 +12,4 @@ scikit-learn
 scipy
 statsmodels
 tensorflow>=2.10
+tf-keras