Skip to content

Commit a3fe789

Browse files
author
Onno Kampman
committed
minor edits
1 parent dcd6f83 commit a3fe789

File tree

3 files changed

+53
-18
lines changed

3 files changed

+53
-18
lines changed

fcest/models/likelihoods.py

+44-10
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
class WishartProcessLikelihoodBase(MonteCarloLikelihood):
3030
"""
31-
Class for Wishart process likelihoods.
31+
Abstract class for all Wishart process likelihoods.
3232
"""
3333

3434
def __init__(
@@ -40,6 +40,7 @@ def __init__(
4040
):
4141
"""
4242
Initialize the base Wishart process likelihood.
43+
This implementation assumes the input is uni-dimensional.
4344
4445
Parameters
4546
----------
@@ -49,6 +50,7 @@ def __init__(
4950
Degrees of freedom.
5051
:param num_mc_samples:
5152
Number of Monte Carlo samples used to approximate gradients (S).
53+
Sometimes also denoted as R.
5254
"""
5355
if num_factors is not None:
5456
latent_dim = num_factors * nu
@@ -76,7 +78,7 @@ def __init__(
7678
D: int,
7779
nu: int = None,
7880
num_mc_samples: int = 2,
79-
A_scale_matrix_option: str = 'train_full_matrix',
81+
scale_matrix_cholesky_option: str = 'train_full_matrix',
8082
train_additive_noise: bool = False,
8183
additive_noise_matrix_init: float = 0.01,
8284
verbose: bool = True,
@@ -92,7 +94,7 @@ def __init__(
9294
Degrees of freedom.
9395
:param num_mc_samples:
9496
Number of Monte Carlo samples used to approximate gradients (S).
95-
:param A_scale_matrix_option:
97+
:param scale_matrix_cholesky_option:
9698
:param train_additive_noise:
9799
Whether to train the additive noise matrix (Lambda).
98100
:param additive_noise_matrix_init:
@@ -109,7 +111,9 @@ def __init__(
109111
nu=nu,
110112
num_mc_samples=num_mc_samples,
111113
)
112-
self.A_scale_matrix = self._set_A_scale_matrix(option=A_scale_matrix_option) # (D, D)
114+
self.A_scale_matrix = self._set_A_scale_matrix(
115+
option=scale_matrix_cholesky_option
116+
) # (D, D)
113117

114118
# The additive noise matrix must have positive diagonal values, which this softplus construction guarantees.
115119
additive_noise_matrix_init = np.log(
@@ -122,7 +126,7 @@ def __init__(
122126
) # (D, )
123127

124128
if verbose:
125-
logging.info(f"A scale matrix option is '{A_scale_matrix_option:s}'.")
129+
logging.info(f"Scale matrix Cholesky (matrix A) option is '{scale_matrix_cholesky_option:s}'.")
126130
print('A_scale_matrix: ', self.A_scale_matrix)
127131
print('initial additive part: ', self.additive_part)
128132

@@ -207,13 +211,14 @@ def _log_prob(
207211
# compute the constant term of the log likelihood
208212
constant_term = - self.D / 2 * tf.math.log(2 * tf.constant(np.pi, dtype=tf.float64))
209213

210-
# compute the `log(det(AFFA))` component of the log likelihood
214+
# compute the AFFA component of the log likelihood - our construction of \Sigma
211215
# TODO: this does not work for nu != D
212216
# af = tf.matmul(self.A_scale_matrix, f_sample) # (S, N, D, nu)
213217
af = tf.multiply(self.A_scale_matrix, f_sample)
214-
215-
affa = tf.matmul(af, af, transpose_b=True) # (S, N, D, D) - our construction of \Sigma
218+
affa = tf.matmul(af, af, transpose_b=True) # (S, N, D, D)
216219
affa = self._add_diagonal_additive_noise(affa) # (S, N, D, D)
220+
221+
# compute the `log(det(AFFA))` component of the log likelihood
217222
# Before, the trainable additive noise sometimes broke the Cholesky decomposition.
218223
# This did not happen again after forcing it to be positive.
219224
# TODO: Can adding positive values to the diagonal ever make a PSD matrix become non-PSD?
@@ -224,7 +229,9 @@ def _log_prob(
224229
print(self.additive_part)
225230
print(e)
226231
log_det_affa = 2 * tf.math.reduce_sum(
227-
tf.math.log(tf.linalg.diag_part(L)),
232+
tf.math.log(
233+
tf.linalg.diag_part(L)
234+
),
228235
axis=2
229236
) # (S, N)
230237

@@ -317,7 +324,7 @@ def __init__(
317324
nu: int = None,
318325
num_mc_samples: int = 2,
319326
num_factors: int = None,
320-
A_scale_matrix_option: str = 'train_full_matrix',
327+
scale_matrix_cholesky_option: str = 'train_full_matrix',
321328
train_additive_noise: bool = False,
322329
additive_noise_matrix_init: float = 0.01,
323330
verbose: bool = True,
@@ -330,3 +337,30 @@ def __init__(
330337
)
331338

332339
raise NotImplementedError("Factorized Wishart process not implemented yet.")
340+
341+
def _log_prob(
342+
self,
343+
x_data: np.array,
344+
f_sample: tf.Tensor,
345+
y_data: np.array,
346+
) -> tf.Tensor:
347+
"""
348+
Compute the (Monte Carlo estimate of) the log likelihood given samples of the GPs.
349+
350+
This overrides the method in MonteCarloLikelihood.
351+
352+
Parameters
353+
----------
354+
:param x_data:
355+
Input tensor.
356+
NumPy array of shape (num_time_steps, 1) or (N, 1).
357+
:param f_sample:
358+
Function evaluation tensor.
359+
(num_mc_samples, num_time_steps, num_factors, degrees_of_freedom) or (S, N, K, nu) -
360+
:param y_data:
361+
Observation tensor.
362+
(num_time_steps, num_time_series) or (N, D) -
363+
:return:
364+
(num_time_steps, ) or (N, )
365+
"""
366+
assert isinstance(f_sample, tf.Tensor)

fcest/models/wishart_process.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ def __init__(
5353
nu: int = None,
5454
kernel: Kernel = None,
5555
num_mc_samples: int = 5,
56-
A_scale_matrix_option: str = 'train_full_matrix',
56+
scale_matrix_cholesky_option: str = 'train_full_matrix',
5757
train_additive_noise: bool = True,
5858
kernel_lengthscale_init: float = 0.3,
5959
q_sqrt_init: float = 0.001,
@@ -78,7 +78,7 @@ def __init__(
7878
:param num_mc_samples:
7979
The number of Monte Carlo samples used to approximate the ELBO.
8080
In the paper this is R, in the code sometimes S.
81-
:param A_scale_matrix_option:
81+
:param scale_matrix_cholesky_option:
8282
We found that training the full matrix yields the best results.
8383
:param train_additive_noise:
8484
:param kernel_lengthscale_init:
@@ -105,7 +105,7 @@ def __init__(
105105
D=self.D,
106106
nu=nu,
107107
num_mc_samples=num_mc_samples,
108-
A_scale_matrix_option=A_scale_matrix_option,
108+
scale_matrix_cholesky_option=scale_matrix_cholesky_option,
109109
train_additive_noise=train_additive_noise,
110110
num_factors=num_factors,
111111
)
@@ -114,7 +114,7 @@ def __init__(
114114
D=self.D,
115115
nu=nu,
116116
num_mc_samples=num_mc_samples,
117-
A_scale_matrix_option=A_scale_matrix_option,
117+
scale_matrix_cholesky_option=scale_matrix_cholesky_option,
118118
train_additive_noise=train_additive_noise,
119119
)
120120
super().__init__(
@@ -338,7 +338,7 @@ def __init__(
338338
nu: int = None,
339339
kernel: Kernel = gpflow.kernels.Matern52(),
340340
num_mc_samples: int = 5,
341-
A_scale_matrix_option: str = 'train_full_matrix',
341+
scale_matrix_cholesky_option: str = 'train_full_matrix',
342342
train_additive_noise: bool = True,
343343
kernel_lengthscale_init: float = 0.3,
344344
q_sqrt_init: float = 0.001,
@@ -362,7 +362,7 @@ def __init__(
362362
:param kernel:
363363
:param num_mc_samples:
364364
Number of Monte Carlo samples taken to approximate the ELBO.
365-
:param A_scale_matrix_option:
365+
:param scale_matrix_cholesky_option:
366366
:param train_additive_noise:
367367
:param kernel_lengthscale_init:
368368
:param q_sqrt_init:
@@ -387,7 +387,7 @@ def __init__(
387387
D=self.D,
388388
nu=nu,
389389
num_mc_samples=num_mc_samples,
390-
A_scale_matrix_option=A_scale_matrix_option,
390+
scale_matrix_cholesky_option=scale_matrix_cholesky_option,
391391
train_additive_noise=train_additive_noise,
392392
num_factors=num_factors,
393393
verbose=verbose,
@@ -397,7 +397,7 @@ def __init__(
397397
D=self.D,
398398
nu=nu,
399399
num_mc_samples=num_mc_samples,
400-
A_scale_matrix_option=A_scale_matrix_option,
400+
scale_matrix_cholesky_option=scale_matrix_cholesky_option,
401401
train_additive_noise=train_additive_noise,
402402
verbose=verbose,
403403
)

tests_requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ scikit-learn
1212
scipy
1313
statsmodels
1414
tensorflow>=2.10
15+
tf-keras

0 commit comments

Comments
 (0)