diff --git a/tensorflow_probability/python/distributions/BUILD b/tensorflow_probability/python/distributions/BUILD index d3052fd533..e25cc35a3f 100644 --- a/tensorflow_probability/python/distributions/BUILD +++ b/tensorflow_probability/python/distributions/BUILD @@ -1705,7 +1705,7 @@ multi_substrate_py_test( srcs = [ "cholesky_lkj_test.py", ], - jax_size = "large", + jax_tags = ["notap"], deps = [ # absl/testing:parameterized dep, # numpy dep, diff --git a/tensorflow_probability/python/distributions/cholesky_lkj.py b/tensorflow_probability/python/distributions/cholesky_lkj.py index f6f457681b..3823745ed1 100644 --- a/tensorflow_probability/python/distributions/cholesky_lkj.py +++ b/tensorflow_probability/python/distributions/cholesky_lkj.py @@ -46,6 +46,16 @@ class CholeskyLKJ(distribution.Distribution): In other words, if If `X ~ CholeskyLKJ(c)`, then `X @ X^T ~ LKJ(c)`. + The distribution is supported on lower triangular N x N matrices which are + Cholesky factors of correlation matrices; equivalently, matrices whose rows + have Euclidean norm 1 and diagonal entries are positive. The probability + density function is given by: + + pdf(X; c) = (1/Z(c)) prod_i X_ii^{n-i+2c-3} (0 <= i < n) + + where there are n(n-1)/2 independent variables X_ij (0 <= i < j < n) and + Z(c) is the normalizing constant; the same one as that of LKJ(c). + For more details on the LKJ distribution, see `tfp.distributions.LKJ`. #### Examples @@ -169,11 +179,9 @@ def _log_prob(self, x): # This log_prob comes from using a change of variables via the Cholesky # decomposition on the LKJ's log_prob. # The first term represents the change of variables of the LKJ's - # unnormalized log_prob, the second is the normalization term coming - # from the LKJ distribution, and the final is a normalization term - # coming from the change of variables. - return (self._log_unnorm_prob(x, concentration) - - normalizer + self.dimension * np.log(2.)) + # unnormalized log_prob and the second is the normalization term coming + # from the LKJ distribution. + return self._log_unnorm_prob(x, concentration) - normalizer def _log_unnorm_prob(self, x, concentration, name=None): """Returns the unnormalized log density of a CholeskyLKJ distribution. @@ -195,12 +203,26 @@ def _log_unnorm_prob(self, x, concentration, name=None): x = tf.convert_to_tensor(x, name='x') logdiag = tf.math.log(tf.linalg.diag_part(x)) # We pick up a weighted sum of the log(diag) due to the jacobian - # of the cholesky decomposition. See `tfp.bijectors.CholeskyOuterProduct` - # for details. + # of the cholesky decomposition. By an argument similar to that of + # `tfp.bijectors.CholeskyOuterProduct`, the jacobian is given by: + # prod_i x_ii^{n-i-1} (0 <= i < n). + # + # To see this, observe that if x @ x^T = p, then p_ij depends only on + # those x_kl where k<=i and l<=j. Therefore, on vectorizing the strictly + # lower triangular parts of x and p, we get that the jacobian matrix + # [d/dvec(x) vec(p)] is lower triangular. The jacobian determinant is then + # the product of the n(n-1)/2 diagonal entries: + # J = prod_ij d/dx_ij p_ij (0 <= j < i < n) + # = prod_ij d/dx_ij (x_i0 * x_j0 + x_i1 * x_j1 + ... + x_ij * x_jj) + # = prod_ij x_jj + # = prod_i x_ii^{n-i-1} + # + # For more details, see `tfp.bijectors.CholeskyOuterProduct`. dimension_range = np.linspace( + self.dimension - 1, + 0., self.dimension, - 1., self.dimension, dtype=dtype_util.as_numpy_dtype( - concentration.dtype)) + dtype=dtype_util.as_numpy_dtype(concentration.dtype)) return tf.reduce_sum( (2. * concentration[..., tf.newaxis] - 2. + dimension_range) * logdiag, axis=-1) diff --git a/tensorflow_probability/python/distributions/cholesky_lkj_test.py b/tensorflow_probability/python/distributions/cholesky_lkj_test.py index 18959565d4..70cd683a8d 100644 --- a/tensorflow_probability/python/distributions/cholesky_lkj_test.py +++ b/tensorflow_probability/python/distributions/cholesky_lkj_test.py @@ -36,21 +36,108 @@ class CholeskyLKJTest(test_util.TestCase): def testLogProbMatchesTransformedDistribution(self, dtype): - dtype = np.float64 - for dims in (3, 4, 5): + + # In this test, we start with a distribution supported on N x N SPD matrices + # which factorizes as an LKJ-supported correlation matrix and a diagonal + # of exponential random variables. The total number of independent + # parameters is N(N-1)/2 + N = N(N+1)/2. Using the CholeskyOuterProduct + # bijector (which requires N(N+1)/2 independent parameters), we map it to + # the space of lower triangular Cholesky factors. We show that the resulting + # distribution factorizes as the product of CholeskyLKJ and Rayleigh + # distributions. + + # Given a sample from the 'LKJ + Exponential' distribution on SPD matrices, + # and the corresponding log prob, returns the transformed sample and log + # prob in Cholesky-space; which is furthermore factored into a diagonal + # matrix and a Cholesky factor of a correlation matrix. + def _get_transformed_sample_and_log_prob(lkj_exp_sample, lkj_exp_log_prob, + dimension): + # We change variables to the space of SPD matrices parameterized by the + # lower triangular entries (including the diagonal) of \Sigma. + # The transformation is given by \Sigma = \sqrt{S} P \sqrt{S}. + # + # The jacobian matrix J of the forward transform has the block form + # [[I, 0]; [*, D]]; where I is the N x N identity matrix; and D is an + # N*(N - 1) square diagonal matrix and * need not be computed. + # Here, N is the dimension. D_ij (0<=i