26
26
)
27
27
28
28
29
- class WishartProcessLikelihood (MonteCarloLikelihood ):
29
+ class WishartProcessLikelihoodBase (MonteCarloLikelihood ):
30
+ """
31
+ Abstract class for all Wishart process likelihoods.
32
+ """
33
+
34
+ def __init__ (
35
+ self ,
36
+ D : int ,
37
+ nu : int = None ,
38
+ num_mc_samples : int = 2 ,
39
+ num_factors : int = None ,
40
+ ):
41
+ """
42
+ Initialize the base Wishart process likelihood.
43
+ This implementation assumes the input is uni-dimensional.
44
+
45
+ Parameters
46
+ ----------
47
+ :param D:
48
+ The number of time series.
49
+ :param nu:
50
+ Degrees of freedom.
51
+ :param num_mc_samples:
52
+ Number of Monte Carlo samples used to approximate gradients (S).
53
+ Sometimes also denoted as R.
54
+ """
55
+ if num_factors is not None :
56
+ latent_dim = num_factors * nu
57
+ else :
58
+ latent_dim = D * nu
59
+ super ().__init__ (
60
+ input_dim = 1 ,
61
+ latent_dim = latent_dim ,
62
+ observation_dim = D ,
63
+ )
64
+ self .D = D
65
+ self .nu = nu
66
+ self .num_mc_samples = num_mc_samples
67
+ self .num_factors = num_factors
68
+
69
+
70
+ class WishartProcessLikelihood (WishartProcessLikelihoodBase ):
30
71
"""
31
72
Class for Wishart process likelihoods.
32
73
It specifies an observation model connecting the latent functions ('F') to the data ('Y').
33
74
"""
34
75
35
76
def __init__ (
36
- self ,
37
- D : int ,
38
- nu : int = None ,
39
- num_mc_samples : int = 2 ,
40
- A_scale_matrix_option : str = 'train_full_matrix' ,
41
- train_additive_noise : bool = False ,
42
- additive_noise_matrix_init : float = 0.01 ,
43
- verbose : bool = True ,
77
+ self ,
78
+ D : int ,
79
+ nu : int = None ,
80
+ num_mc_samples : int = 2 ,
81
+ scale_matrix_cholesky_option : str = 'train_full_matrix' ,
82
+ train_additive_noise : bool = False ,
83
+ additive_noise_matrix_init : float = 0.01 ,
84
+ verbose : bool = True ,
44
85
) -> None :
45
86
"""
46
87
Initialize the Wishart process likelihood.
47
88
48
89
Parameters
49
90
----------
50
- :param D:
91
+ :param D:
51
92
The number of time series.
52
- :param nu:
93
+ :param nu:
53
94
Degrees of freedom.
54
95
:param num_mc_samples:
55
96
Number of Monte Carlo samples used to approximate gradients (S).
56
- :param A_scale_matrix_option :
97
+ :param scale_matrix_cholesky_option :
57
98
:param train_additive_noise:
58
99
Whether to train the additive noise matrix (Lambda).
59
100
:param additive_noise_matrix_init:
@@ -66,14 +107,13 @@ def __init__(
66
107
if nu < D :
67
108
raise Exception ("Wishart Degrees of Freedom must be >= D." )
68
109
super ().__init__ (
69
- input_dim = 1 ,
70
- latent_dim = D * nu ,
71
- observation_dim = D ,
110
+ D = D ,
111
+ nu = nu ,
112
+ num_mc_samples = num_mc_samples ,
72
113
)
73
- self .D = D
74
- self .nu = nu
75
- self .num_mc_samples = num_mc_samples
76
- self .A_scale_matrix = self ._set_A_scale_matrix (option = A_scale_matrix_option ) # (D, D)
114
+ self .A_scale_matrix = self ._set_A_scale_matrix (
115
+ option = scale_matrix_cholesky_option
116
+ ) # (D, D)
77
117
78
118
# The additive noise matrix must have positive diagonal values, which this softplus construction guarantees.
79
119
additive_noise_matrix_init = np .log (
@@ -86,16 +126,16 @@ def __init__(
86
126
) # (D, )
87
127
88
128
if verbose :
89
- logging .info (f"A scale matrix option is '{ A_scale_matrix_option :s} '." )
129
+ logging .info (f"Scale matrix Cholesky ( matrix A) option is '{ scale_matrix_cholesky_option :s} '." )
90
130
print ('A_scale_matrix: ' , self .A_scale_matrix )
91
131
print ('initial additive part: ' , self .additive_part )
92
132
93
133
def variational_expectations (
94
- self ,
95
- x_data : np .array ,
96
- f_mean : tf .Tensor ,
97
- f_variance : tf .Tensor ,
98
- y_data : np .array ,
134
+ self ,
135
+ x_data : np .array ,
136
+ f_mean : tf .Tensor ,
137
+ f_variance : tf .Tensor ,
138
+ y_data : np .array ,
99
139
) -> tf .Tensor :
100
140
"""
101
141
This returns the expectation of log likelihood part of the ELBO.
@@ -171,13 +211,14 @@ def _log_prob(
171
211
# compute the constant term of the log likelihood
172
212
constant_term = - self .D / 2 * tf .math .log (2 * tf .constant (np .pi , dtype = tf .float64 ))
173
213
174
- # compute the `log(det( AFFA))` component of the log likelihood
214
+ # compute the AFFA component of the log likelihood - our construction of \Sigma
175
215
# TODO: this does not work for nu != D
176
216
# af = tf.matmul(self.A_scale_matrix, f_sample) # (S, N, D, nu)
177
217
af = tf .multiply (self .A_scale_matrix , f_sample )
178
-
179
- affa = tf .matmul (af , af , transpose_b = True ) # (S, N, D, D) - our construction of \Sigma
218
+ affa = tf .matmul (af , af , transpose_b = True ) # (S, N, D, D)
180
219
affa = self ._add_diagonal_additive_noise (affa ) # (S, N, D, D)
220
+
221
+ # compute the `log(det(AFFA))` component of the log likelihood
181
222
# Before, the trainable additive noise sometimes broke the Cholesky decomposition.
182
223
# This did not happen again after forcing it to be positive.
183
224
# TODO: Can adding positive values to the diagonal ever make a PSD matrix become non-PSD?
@@ -188,7 +229,9 @@ def _log_prob(
188
229
print (self .additive_part )
189
230
print (e )
190
231
log_det_affa = 2 * tf .math .reduce_sum (
191
- tf .math .log (tf .linalg .diag_part (L )),
232
+ tf .math .log (
233
+ tf .linalg .diag_part (L )
234
+ ),
192
235
axis = 2
193
236
) # (S, N)
194
237
@@ -205,7 +248,10 @@ def _log_prob(
205
248
log_likel_p = tf .math .reduce_mean (log_likel_p , axis = 0 ) # mean over Monte Carlo samples, (N, )
206
249
return log_likel_p
207
250
208
- def _set_A_scale_matrix (self , option : str = 'identity' ) -> tf .Tensor :
251
+ def _set_A_scale_matrix (
252
+ self ,
253
+ option : str = 'identity' ,
254
+ ) -> tf .Tensor :
209
255
"""
210
256
A (the Cholesky factor of scale matrix V) represents the mean of estimates.
211
257
@@ -250,7 +296,10 @@ def _set_A_scale_matrix(self, option: str = 'identity') -> tf.Tensor:
250
296
raise NotImplementedError (f"Option '{ option :s} ' for 'A_scale_matrix' not recognized." )
251
297
return A_scale_matrix
252
298
253
- def _add_diagonal_additive_noise (self , cov_matrix ):
299
+ def _add_diagonal_additive_noise (
300
+ self ,
301
+ cov_matrix ,
302
+ ):
254
303
"""
255
304
Add some noise, either fixed or trained.
256
305
@@ -264,7 +313,54 @@ def _add_diagonal_additive_noise(self, cov_matrix):
264
313
)
265
314
266
315
267
- class FactoredWishartProcessLikelihood (MonteCarloLikelihood ):
316
+ class FactoredWishartProcessLikelihood (WishartProcessLikelihoodBase ):
317
+ """
318
+ Class for Factored Wishart process likelihoods.
319
+ """
320
+
321
+ def __init__ (
322
+ self ,
323
+ D : int ,
324
+ nu : int = None ,
325
+ num_mc_samples : int = 2 ,
326
+ num_factors : int = None ,
327
+ scale_matrix_cholesky_option : str = 'train_full_matrix' ,
328
+ train_additive_noise : bool = False ,
329
+ additive_noise_matrix_init : float = 0.01 ,
330
+ verbose : bool = True ,
331
+ ):
332
+ nu = num_factors if nu is None else nu
333
+ super ().__init__ (
334
+ D = D ,
335
+ nu = nu ,
336
+ num_mc_samples = num_mc_samples ,
337
+ )
268
338
269
- def __init__ (self ):
270
339
raise NotImplementedError ("Factorized Wishart process not implemented yet." )
340
+
341
+ def _log_prob (
342
+ self ,
343
+ x_data : np .array ,
344
+ f_sample : tf .Tensor ,
345
+ y_data : np .array ,
346
+ ) -> tf .Tensor :
347
+ """
348
+ Compute the (Monte Carlo estimate of) the log likelihood given samples of the GPs.
349
+
350
+ This overrides the method in MonteCarloLikelihood.
351
+
352
+ Parameters
353
+ ----------
354
+ :param x_data:
355
+ Input tensor.
356
+ NumPy array of shape (num_time_steps, 1) or (N, 1).
357
+ :param f_sample:
358
+ Function evaluation tensor.
359
+ (num_mc_samples, num_time_steps, num_factors, degrees_of_freedom) or (S, N, K, nu) -
360
+ :param y_data:
361
+ Observation tensor.
362
+ (num_time_steps, num_time_series) or (N, D) -
363
+ :return:
364
+ (num_time_steps, ) or (N, )
365
+ """
366
+ assert isinstance (f_sample , tf .Tensor )
0 commit comments