diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index ccde13925bc..f3035fe825e 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -45,9 +45,16 @@ All of the above apply to: - Changes to the BART implementation: - A BART variable can be combined with other random variables. The `inv_link` argument has been removed (see [4914](https://github.com/pymc-devs/pymc3/pull/4914)). - Moved BART to its own module (see [5058](https://github.com/pymc-devs/pymc3/pull/5058)). +- Changes to the Gaussian Process (GP) submodule (see [5055](https://github.com/pymc-devs/pymc/pull/5055)): + - For all implementations, `gp.Latent`, `gp.Marginal` etc., `cov_func` and `mean_func` are required kwargs. + - In Windows test conda environment the `mkl` version is fixed to verison 2020.4, and `mkl-service` is fixed to `2.3.0`. This was required for `gp.MarginalKron` to function properly. + - `gp.MvStudentT` uses rotated samples from `StudentT` directly now, instead of sampling from `pm.Chi2` and then from `pm.Normal`. + - The "jitter" parameter, or the diagonal noise term added to Gram matrices such that the Cholesky is numerically stable, is now exposed to the user instead of hard-coded. See the function `gp.util.stabilize`. + - The `is_observed` arguement for `gp.Marginal*` implementations has been deprecated. + - In the gp.utils file, the `kmeans_inducing_points` function now passes through `kmeans_kwargs` to scipy's k-means function. + - The function `replace_with_values` function has been added to `gp.utils`. - ... - ### Expected breaks + New API was already available in `v3`. + Old API had deprecation warnings since at least `3.11.0` (2021-01). diff --git a/conda-envs/windows-environment-test-py38.yml b/conda-envs/windows-environment-test-py38.yml index 5a1f3629c2e..79f0d8c599b 100644 --- a/conda-envs/windows-environment-test-py38.yml +++ b/conda-envs/windows-environment-test-py38.yml @@ -12,7 +12,8 @@ dependencies: - fastprogress>=0.2.0 - h5py>=2.7 - libpython -- mkl-service +- mkl==2020.4 +- mkl-service==2.3.0 - m2w64-toolchain - numpy>=1.15.0 - pandas diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index ccbbe59b81e..1a19edb2bbf 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -64,7 +64,7 @@ def __init__(self, input_dim, active_dims=None): if active_dims is None: self.active_dims = np.arange(input_dim) else: - self.active_dims = np.asarray(active_dims, np.int) + self.active_dims = np.asarray(active_dims, int) def __call__(self, X, Xs=None, diag=False): r""" @@ -152,7 +152,9 @@ def __array_wrap__(self, result): elif isinstance(result[0][0], Prod): return result[0][0].factor_list[0] * A else: - raise RuntimeError + raise TypeError( + f"Unknown Covariance combination type {result[0][0]}. Known types are `Add` or `Prod`." + ) class Combination(Covariance): diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 0ed4d719f9d..b8a76519598 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -24,9 +24,11 @@ from pymc.gp.cov import Constant, Covariance from pymc.gp.mean import Zero from pymc.gp.util import ( + JITTER_DEFAULT, cholesky, conditioned_vars, - infer_shape, + infer_size, + replace_with_values, solve_lower, solve_upper, stabilize, @@ -41,7 +43,7 @@ class Base: Base class. """ - def __init__(self, mean_func=Zero(), cov_func=Constant(0.0)): + def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)): self.mean_func = mean_func self.cov_func = cov_func @@ -51,7 +53,7 @@ def __add__(self, other): raise TypeError("Cannot add different GP types") mean_total = self.mean_func + other.mean_func cov_total = self.cov_func + other.cov_func - return self.__class__(mean_total, cov_total) + return self.__class__(mean_func=mean_total, cov_func=cov_total) def prior(self, name, X, *args, **kwargs): raise NotImplementedError @@ -62,7 +64,7 @@ def marginal_likelihood(self, name, X, *args, **kwargs): def conditional(self, name, Xnew, *args, **kwargs): raise NotImplementedError - def predict(self, Xnew, point=None, given=None, diag=False): + def predict(self, Xnew, point=None, given=None, diag=False, model=None): raise NotImplementedError @@ -122,21 +124,21 @@ class Latent(Base): fcond = gp.conditional("fcond", Xnew=Xnew) """ - def __init__(self, mean_func=Zero(), cov_func=Constant(0.0)): - super().__init__(mean_func, cov_func) + def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)): + super().__init__(mean_func=mean_func, cov_func=cov_func) - def _build_prior(self, name, X, reparameterize=True, **kwargs): + def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): mu = self.mean_func(X) - cov = stabilize(self.cov_func(X)) - shape = infer_shape(X, kwargs.pop("shape", None)) + cov = stabilize(self.cov_func(X), jitter) if reparameterize: - v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=shape, **kwargs) + size = infer_size(X, kwargs.pop("size", None)) + v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=size, **kwargs) f = pm.Deterministic(name, mu + cholesky(cov).dot(v)) else: - f = pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + f = pm.MvNormal(name, mu=mu, cov=cov, **kwargs) return f - def prior(self, name, X, reparameterize=True, **kwargs): + def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the GP prior distribution evaluated over the input locations `X`. @@ -157,11 +159,14 @@ def prior(self, name, X, reparameterize=True, **kwargs): reparameterize: bool Reparameterize the distribution by rotating the random variable by the Cholesky factor of the covariance matrix. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to distribution constructor. """ - f = self._build_prior(name, X, reparameterize, **kwargs) + f = self._build_prior(name, X, reparameterize, jitter, **kwargs) self.X = X self.f = f return f @@ -181,10 +186,10 @@ def _get_given_vals(self, given): X, f = self.X, self.f return X, f, cov_total, mean_total - def _build_conditional(self, Xnew, X, f, cov_total, mean_total): + def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) - L = cholesky(stabilize(Kxx)) + L = cholesky(stabilize(Kxx, jitter)) A = solve_lower(L, Kxs) v = solve_lower(L, f - mean_total(X)) mu = self.mean_func(Xnew) + at.dot(at.transpose(A), v) @@ -192,7 +197,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total): cov = Kss - at.dot(at.transpose(A), A) return mu, cov - def conditional(self, name, Xnew, given=None, **kwargs): + def conditional(self, name, Xnew, given=None, jitter=0.0, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -217,14 +222,17 @@ def conditional(self, name, Xnew, given=None, **kwargs): Can optionally take as key value pairs: `X`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC for more information. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) - mu, cov = self._build_conditional(Xnew, *givens) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + mu, cov = self._build_conditional(Xnew, *givens, jitter) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @conditioned_vars(["X", "f", "nu"]) @@ -257,28 +265,27 @@ class TP(Latent): Processes as Alternatives to Gaussian Processes. arXiv preprint arXiv:1402.4306. """ - def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), nu=None): + def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0), nu=None): if nu is None: raise ValueError("Student's T process requires a degrees of freedom parameter, 'nu'") self.nu = nu - super().__init__(mean_func, cov_func) + super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): raise TypeError("Student's T processes aren't additive") - def _build_prior(self, name, X, reparameterize=True, **kwargs): + def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): mu = self.mean_func(X) - cov = stabilize(self.cov_func(X)) - shape = infer_shape(X, kwargs.pop("shape", None)) + cov = stabilize(self.cov_func(X), jitter) if reparameterize: - chi2 = pm.ChiSquared(name + "_chi2_", self.nu) - v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=shape, **kwargs) - f = pm.Deterministic(name, (at.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))) + size = infer_size(X, kwargs.pop("size", None)) + v = pm.StudentT(name + "_rotated_", mu=0.0, sigma=1.0, nu=self.nu, size=size, **kwargs) + f = pm.Deterministic(name, mu + cholesky(cov).dot(v)) else: - f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, size=shape, **kwargs) + f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, **kwargs) return f - def prior(self, name, X, reparameterize=True, **kwargs): + def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the TP prior distribution evaluated over the input locations `X`. @@ -299,16 +306,16 @@ def prior(self, name, X, reparameterize=True, **kwargs): Extra keyword arguments that are passed to distribution constructor. """ - f = self._build_prior(name, X, reparameterize, **kwargs) + f = self._build_prior(name, X, reparameterize, jitter, **kwargs) self.X = X self.f = f return f - def _build_conditional(self, Xnew, X, f): + def _build_conditional(self, Xnew, X, f, jitter): Kxx = self.cov_func(X) Kxs = self.cov_func(X, Xnew) Kss = self.cov_func(Xnew) - L = cholesky(stabilize(Kxx)) + L = cholesky(stabilize(Kxx, jitter)) A = solve_lower(L, Kxs) cov = Kss - at.dot(at.transpose(A), A) v = solve_lower(L, f - self.mean_func(X)) @@ -318,7 +325,7 @@ def _build_conditional(self, Xnew, X, f): covT = (self.nu + beta - 2) / (nu2 - 2) * cov return nu2, mu, covT - def conditional(self, name, Xnew, **kwargs): + def conditional(self, name, Xnew, jitter=0.0, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -333,6 +340,10 @@ def conditional(self, name, Xnew, **kwargs): Name of the random variable Xnew: array-like Function input values. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -340,9 +351,8 @@ def conditional(self, name, Xnew, **kwargs): X = self.X f = self.f - nu2, mu, cov = self._build_conditional(Xnew, X, f) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, size=shape, **kwargs) + nu2, mu, cov = self._build_conditional(Xnew, X, f, jitter) + return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, **kwargs) @conditioned_vars(["X", "y", "noise"]) @@ -391,14 +401,14 @@ class Marginal(Base): fcond = gp.conditional("fcond", Xnew=Xnew) """ - def _build_marginal_likelihood(self, X, noise): + def _build_marginal_likelihood(self, X, noise, jitter): mu = self.mean_func(X) Kxx = self.cov_func(X) Knx = noise(X) cov = Kxx + Knx - return mu, cov + return mu, stabilize(cov, jitter) - def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): + def marginal_likelihood(self, name, X, y, noise, jitter=0.0, is_observed=True, **kwargs): R""" Returns the marginal likelihood distribution, given the input locations `X` and the data `y`. @@ -422,9 +432,9 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): noise: scalar, Variable, or Covariance Standard deviation of the Gaussian noise. Can also be a Covariance for non-white noise. - is_observed: bool - Whether to set `y` as an `observed` variable in the `model`. - Default is `True`. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -432,15 +442,19 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): if not isinstance(noise, Covariance): noise = pm.gp.cov.WhiteNoise(noise) - mu, cov = self._build_marginal_likelihood(X, noise) + mu, cov = self._build_marginal_likelihood(X, noise, jitter) self.X = X self.y = y self.noise = noise if is_observed: return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs) else: - shape = infer_shape(X, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + warnings.warn( + "The 'is_observed' argument has been deprecated. If the GP is " + "unobserved use gp.Latent instead.", + FutureWarning, + ) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) def _get_given_vals(self, given): if given is None: @@ -460,12 +474,14 @@ def _get_given_vals(self, given): X, y, noise = self.X, self.y, self.noise return X, y, noise, cov_total, mean_total - def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total): + def _build_conditional( + self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total, jitter + ): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) Knx = noise(X) rxx = y - mean_total(X) - L = cholesky(stabilize(Kxx) + Knx) + L = cholesky(stabilize(Kxx, jitter) + Knx) A = solve_lower(L, Kxs) v = solve_lower(L, rxx) mu = self.mean_func(Xnew) + at.dot(at.transpose(A), v) @@ -480,9 +496,9 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mea cov = Kss - at.dot(at.transpose(A), A) if pred_noise: cov += noise(Xnew) - return mu, cov if pred_noise else stabilize(cov) + return mu, cov if pred_noise else stabilize(cov, jitter) - def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): + def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -510,17 +526,22 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): Can optionally take as key value pairs: `X`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC for more information. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) - mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) - def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): + def predict( + self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=0.0, model=None + ): R""" Return the mean vector and covariance matrix of the conditional distribution as numpy arrays, given a `point`, such as the MAP @@ -541,16 +562,17 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): Default is `False`. given: dict Same as `conditional` method. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. """ if given is None: given = {} + mu, cov = self._predict_at(Xnew, diag, pred_noise, given, jitter) + return replace_with_values([mu, cov], replacements=point, model=model) - mu, cov = self.predictt(Xnew, diag, pred_noise, given) - # XXX: This needs to be refactored - # return draw_values([mu, cov], point=point) - return None, None - - def predictt(self, Xnew, diag=False, pred_noise=False, given=None): + def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=0.0): R""" Return the mean vector and covariance matrix of the conditional distribution as symbolic variables. @@ -570,7 +592,7 @@ def predictt(self, Xnew, diag=False, pred_noise=False, given=None): Same as `conditional` method. """ givens = self._get_given_vals(given) - mu, cov = self._build_conditional(Xnew, pred_noise, diag, *givens) + mu, cov = self._build_conditional(Xnew, pred_noise, diag, *givens, jitter) return mu, cov @@ -643,7 +665,7 @@ def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), approx="FITC"): if approx not in self._available_approx: raise NotImplementedError(approx) self.approx = approx - super().__init__(mean_func, cov_func) + super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): # new_gp will default to FITC approx @@ -657,11 +679,11 @@ def __add__(self, other): # Use y as first argument, so that we can use functools.partial # in marginal_likelihood instead of lambda. This makes pickling # possible. - def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): + def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter): sigma2 = at.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) - Luu = cholesky(stabilize(Kuu)) + Luu = cholesky(stabilize(Kuu, jitter)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": @@ -686,7 +708,9 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace) - def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kwargs): + def marginal_likelihood( + self, name, X, Xu, y, noise=None, is_observed=True, jitter=0.0, **kwargs + ): R""" Returns the approximate marginal likelihood distribution, given the input locations `X`, inducing point locations `Xu`, data `y`, and white noise @@ -709,6 +733,9 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw is_observed: bool Whether to set `y` as an `observed` variable in the `model`. Default is `True`. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -717,24 +744,19 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw self.X = X self.Xu = Xu self.y = y + if noise is None: - sigma = kwargs.get("sigma") - if sigma is None: - raise ValueError("noise argument must be specified") - else: - self.sigma = sigma - warnings.warn( - "The 'sigma' argument has been deprecated. Use 'noise' instead.", - FutureWarning, - ) + raise ValueError("noise argument must be specified") else: self.sigma = noise + if is_observed: return pm.DensityDist( name, X, Xu, self.sigma, + jitter, logp=self._build_marginal_likelihood_logp, observed=y, ndims_params=[2, 2, 0], @@ -742,11 +764,17 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw **kwargs, ) else: + warnings.warn( + "The 'is_observed' argument has been deprecated. If the GP is " + "unobserved use gp.Latent instead.", + FutureWarning, + ) return pm.DensityDist( name, X, Xu, self.sigma, + jitter, logp=self._build_marginal_likelihood_logp, observed=y, ndims_params=[2, 2, 0], @@ -755,11 +783,13 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw **kwargs, ) - def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total): + def _build_conditional( + self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total, jitter + ): sigma2 = at.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) - Luu = cholesky(stabilize(Kuu)) + Luu = cholesky(stabilize(Kuu, jitter)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": @@ -786,7 +816,7 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, cov = self.cov_func(Xnew) - at.dot(at.transpose(As), As) + at.dot(at.transpose(C), C) if pred_noise: cov += sigma2 * at.identity_like(cov) - return mu, cov if pred_noise else stabilize(cov) + return mu, cov if pred_noise else stabilize(cov, jitter) def _get_given_vals(self, given): if given is None: @@ -803,7 +833,7 @@ def _get_given_vals(self, given): X, Xu, y, sigma = self.X, self.Xu, self.y, self.sigma return X, Xu, y, sigma, cov_total, mean_total - def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): + def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kwargs): R""" Returns the approximate conditional distribution of the GP evaluated over new input locations `Xnew`. @@ -822,15 +852,18 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): Can optionally take as key value pairs: `X`, `Xu`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC for more information. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) - mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @conditioned_vars(["Xs", "f"]) @@ -887,27 +920,26 @@ class LatentKron(Base): fcond = gp.conditional("fcond", Xnew=Xnew) """ - def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))): + def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))): try: self.cov_funcs = list(cov_funcs) except TypeError: self.cov_funcs = [cov_funcs] cov_func = pm.gp.cov.Kron(self.cov_funcs) - super().__init__(mean_func, cov_func) + super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): raise TypeError("Additive, Kronecker-structured processes not implemented") - def _build_prior(self, name, Xs, **kwargs): - self.N = np.prod([len(X) for X in Xs]) + def _build_prior(self, name, Xs, jitter, **kwargs): + self.N = int(np.prod([len(X) for X in Xs])) mu = self.mean_func(cartesian(*Xs)) - chols = [cholesky(stabilize(cov(X))) for cov, X in zip(self.cov_funcs, Xs)] - # remove reparameterization option + chols = [cholesky(stabilize(cov(X), jitter)) for cov, X in zip(self.cov_funcs, Xs)] v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=self.N, **kwargs) f = pm.Deterministic(name, mu + at.flatten(kron_dot(chols, v))) return f - def prior(self, name, Xs, **kwargs): + def prior(self, name, Xs, jitter=JITTER_DEFAULT, **kwargs): """ Returns the prior distribution evaluated over the input locations `Xs`. @@ -921,22 +953,26 @@ def prior(self, name, Xs, **kwargs): must be passable to its respective covariance without error. The total covariance function is measured on the full grid `cartesian(*Xs)`. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to the `KroneckerNormal` distribution constructor. """ if len(Xs) != len(self.cov_funcs): raise ValueError("Must provide a covariance function for each X") - f = self._build_prior(name, Xs, **kwargs) + + f = self._build_prior(name, Xs, jitter, **kwargs) self.Xs = Xs self.f = f return f - def _build_conditional(self, Xnew): + def _build_conditional(self, Xnew, jitter): Xs, f = self.Xs, self.f X = cartesian(*Xs) delta = f - self.mean_func(X) - covs = [stabilize(cov(Xi)) for cov, Xi in zip(self.cov_funcs, Xs)] + covs = [stabilize(cov(Xi), jitter) for cov, Xi in zip(self.cov_funcs, Xs)] chols = [cholesky(cov) for cov in covs] cholTs = [at.transpose(chol) for chol in chols] Kss = self.cov_func(Xnew) @@ -946,10 +982,10 @@ def _build_conditional(self, Xnew): alpha = kron_solve_upper(cholTs, alpha) mu = at.dot(Ksx, alpha).ravel() + self.mean_func(Xnew) A = kron_solve_lower(chols, Kxs) - cov = stabilize(Kss - at.dot(at.transpose(A), A)) + cov = stabilize(Kss - at.dot(at.transpose(A), A), jitter) return mu, cov - def conditional(self, name, Xnew, **kwargs): + def conditional(self, name, Xnew, jitter=0.0, **kwargs): """ Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -976,13 +1012,16 @@ def conditional(self, name, Xnew, **kwargs): Xnew: array-like Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ - mu, cov = self._build_conditional(Xnew) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + mu, cov = self._build_conditional(Xnew, jitter) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @conditioned_vars(["Xs", "y", "sigma"]) @@ -1043,13 +1082,13 @@ class MarginalKron(Base): fcond = gp.conditional("fcond", Xnew=Xnew) """ - def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))): + def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))): try: self.cov_funcs = list(cov_funcs) except TypeError: self.cov_funcs = [cov_funcs] cov_func = pm.gp.cov.Kron(self.cov_funcs) - super().__init__(mean_func, cov_func) + super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): raise TypeError("Additive, Kronecker-structured processes not implemented") @@ -1061,14 +1100,12 @@ def _build_marginal_likelihood(self, Xs): return mu, covs def _check_inputs(self, Xs, y): - N = np.prod([len(X) for X in Xs]) + N = int(np.prod([len(X) for X in Xs])) if len(Xs) != len(self.cov_funcs): raise ValueError("Must provide a covariance function for each X") if N != len(y): raise ValueError( - ( - "Length of y ({}) must match length of cartesian" "cartesian product of Xs ({})" - ).format(len(y), N) + f"Length of y ({len(y)}) must match length of cartesian product of Xs ({N})" ) def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): @@ -1105,10 +1142,15 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): if is_observed: return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, observed=y, **kwargs) else: - shape = np.prod([len(X) for X in Xs]) - return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=shape, **kwargs) + warnings.warn( + "The 'is_observed' argument has been deprecated. If the GP is " + "unobserved use gp.LatentKron instead.", + FutureWarning, + ) + size = int(np.prod([len(X) for X in Xs])) + return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs) - def _build_conditional(self, Xnew, pred_noise, diag): + def _build_conditional(self, Xnew, diag, pred_noise): Xs, y, sigma = self.Xs, self.y, self.sigma # Old points @@ -1121,7 +1163,6 @@ def _build_conditional(self, Xnew, pred_noise, diag): if sigma is not None: eigs += sigma ** 2 - # New points Km = self.cov_func(Xnew, diag=diag) Knm = self.cov_func(X, Xnew) Kmn = Knm.T @@ -1147,7 +1188,7 @@ def _build_conditional(self, Xnew, pred_noise, diag): cov += sigma * at.identity_like(cov) return mu, cov - def conditional(self, name, Xnew, pred_noise=False, **kwargs): + def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs): """ Returns the conditional distribution evaluated over new input locations `Xnew`, just as in `Marginal`. @@ -1181,11 +1222,10 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs): Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ - mu, cov = self._build_conditional(Xnew, pred_noise, False) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + mu, cov = self._build_conditional(Xnew, diag, pred_noise) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) - def predict(self, Xnew, point=None, diag=False, pred_noise=False): + def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None): R""" Return the mean vector and covariance matrix of the conditional distribution as numpy arrays, given a `point`, such as the MAP @@ -1205,12 +1245,10 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False): Whether or not observation noise is included in the conditional. Default is `False`. """ - mu, cov = self._build_conditional(Xnew, pred_noise, diag) - # XXX: This needs to be refactored - # return draw_values([mu, cov], point=point) - return None, None + mu, cov = self._predict_at(Xnew, diag, pred_noise) + return replace_with_values([mu, cov], replacements=point, model=model) - def predictt(self, Xnew, diag=False, pred_noise=False): + def _predict_at(self, Xnew, diag=False, pred_noise=False): R""" Return the mean vector and covariance matrix of the conditional distribution as symbolic variables. @@ -1227,5 +1265,5 @@ def predictt(self, Xnew, diag=False, pred_noise=False): Whether or not observation noise is included in the conditional. Default is `False`. """ - mu, cov = self._build_conditional(Xnew, pred_noise, diag) + mu, cov = self._build_conditional(Xnew, diag, pred_noise) return mu, cov diff --git a/pymc/gp/util.py b/pymc/gp/util.py index f47331248c1..730e664eec4 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -17,6 +17,7 @@ import aesara.tensor as at import numpy as np +from aesara.compile import SharedVariable from aesara.tensor.slinalg import ( # noqa: W0611; pylint: disable=unused-import cholesky, solve, @@ -30,10 +31,66 @@ from aesara.tensor.var import TensorConstant from scipy.cluster.vq import kmeans +from pymc.aesaraf import compile_rv_inplace, walk_model -def infer_shape(X, n_points=None): +# Avoid circular dependency when importing modelcontext +from pymc.distributions.distribution import NoDistribution + +assert NoDistribution # keep both pylint and black happy +from pymc.model import modelcontext + +JITTER_DEFAULT = 1e-6 + + +def replace_with_values(vars_needed, replacements=None, model=None): R""" - Maybe attempt to infer the shape of a Gaussian process input matrix. + Replace random variable nodes in the graph with values given by the replacements dict. + Uses untransformed versions of the inputs, performs some basic input validation. + + Parameters + ---------- + vars_needed: list of TensorVariables + A list of variable outputs + replacements: dict with string keys, numeric values + The variable name and values to be replaced in the model graph. + model: Model + A PyMC model object + """ + model = modelcontext(model) + + inputs, input_names = [], [] + for rv in walk_model(vars_needed, walk_past_rvs=True): + if rv in model.named_vars.values() and not isinstance(rv, SharedVariable): + inputs.append(rv) + input_names.append(rv.name) + + # Then it's deterministic, no inputs are required, can eval and return + if len(inputs) == 0: + return tuple(v.eval() for v in vars_needed) + + fn = compile_rv_inplace( + inputs, + vars_needed, + allow_input_downcast=True, + accept_inplace=True, + on_unused_input="ignore", + ) + + # Remove unneeded inputs + replacements = {name: val for name, val in replacements.items() if name in input_names} + missing = set(input_names) - set(replacements.keys()) + + # Error if more inputs are needed + if len(missing) > 0: + missing_str = ", ".join(missing) + raise ValueError(f"Values for {missing_str} must be included in `replacements`.") + + return fn(**replacements) + + +def infer_size(X, n_points=None): + R""" + Maybe attempt to infer the size, or N, of a Gaussian process input matrix. If a specific shape cannot be inferred, for instance if X is symbolic, then an error is raised. @@ -48,31 +105,31 @@ def infer_shape(X, n_points=None): """ if n_points is None: try: - n_points = np.int(X.shape[0]) + n_points = int(X.shape[0]) except TypeError: raise TypeError("Cannot infer 'shape', provide as an argument") return n_points -def stabilize(K, c=1e-6): +def stabilize(K, jitter=JITTER_DEFAULT): R""" Adds small diagonal to a covariance matrix. Often the matrices calculated from covariance functions, `K = cov_func(X)` do not appear numerically to be positive semi-definite. Adding a small - correction, `c`, to the diagonal is usually enough to fix this. + correction, `jitter`, to the diagonal is usually enough to fix this. Parameters ---------- K: array-like A square covariance or kernel matrix. - c: float + jitter: float A small constant. """ - return K + c * at.identity_like(K) + return K + jitter * at.identity_like(K) -def kmeans_inducing_points(n_inducing, X): +def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs): R""" Use the K-means algorithm to initialize the locations `X` for the inducing points `fu`. @@ -83,6 +140,8 @@ def kmeans_inducing_points(n_inducing, X): The number of inducing points (or k, the number of clusters) X: array-like Gaussian process input matrix. + **kmeans_kwargs: + Extra keyword arguments that are passed to `scipy.cluster.vq.kmeans` """ # first whiten X if isinstance(X, TensorConstant): @@ -100,7 +159,11 @@ def kmeans_inducing_points(n_inducing, X): # if std of a column is very small (zero), don't normalize that column scaling[scaling <= 1e-6] = 1.0 Xw = X / scaling - Xu, distortion = kmeans(Xw, n_inducing) + + if "k_or_guess" in kmeans_kwargs: + warn.UserWarning("Use `n_inducing` to set the `k_or_guess` parameter instead.") + + Xu, distortion = kmeans(Xw, k_or_guess=n_inducing, **kmeans_kwargs) return Xu * scaling diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index 424e87e5e8b..7ae9b23c866 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -34,7 +34,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: zero_mean = pm.gp.mean.Zero() - M = aesara.function([], zero_mean(X))() + M = zero_mean(X).eval() assert np.all(M == 0) assert M.shape == (10,) @@ -44,7 +44,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: const_mean = pm.gp.mean.Constant(6) - M = aesara.function([], const_mean(X))() + M = const_mean(X).eval() assert np.all(M == 6) assert M.shape == (10,) @@ -54,7 +54,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: linear_mean = pm.gp.mean.Linear(2, 0.5) - M = aesara.function([], linear_mean(X))() + M = linear_mean(X).eval() npt.assert_allclose(M[1], 0.7222, atol=1e-3) assert M.shape == (10,) @@ -66,7 +66,7 @@ def test_add(self): mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5) mean2 = pm.gp.mean.Constant(2) mean = mean1 + mean2 + mean2 - M = aesara.function([], mean(X))() + M = mean(X).eval() npt.assert_allclose(M[1], 0.7222 + 2 + 2, atol=1e-3) def test_prod(self): @@ -75,7 +75,7 @@ def test_prod(self): mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5) mean2 = pm.gp.mean.Constant(2) mean = mean1 * mean2 * mean2 - M = aesara.function([], mean(X))() + M = mean(X).eval() npt.assert_allclose(M[1], 0.7222 * 2 * 2, atol=1e-3) def test_add_multid(self): @@ -86,7 +86,7 @@ def test_add_multid(self): mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b) mean2 = pm.gp.mean.Constant(2) mean = mean1 + mean2 + mean2 - M = aesara.function([], mean(X))() + M = mean(X).eval() npt.assert_allclose(M[1], 10.8965 + 2 + 2, atol=1e-3) def test_prod_multid(self): @@ -97,7 +97,7 @@ def test_prod_multid(self): mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b) mean2 = pm.gp.mean.Constant(2) mean = mean1 * mean2 * mean2 - M = aesara.function([], mean(X))() + M = mean(X).eval() npt.assert_allclose(M[1], 10.8965 * 2 * 2, atol=1e-3) @@ -108,10 +108,10 @@ def test_symadd_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 + cov2 - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightadd_scalar(self): @@ -119,10 +119,10 @@ def test_rightadd_scalar(self): with pm.Model() as model: a = 1 cov = pm.gp.cov.ExpQuad(1, 0.1) + a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftadd_scalar(self): @@ -130,10 +130,10 @@ def test_leftadd_scalar(self): with pm.Model() as model: a = 1 cov = a + pm.gp.cov.ExpQuad(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightadd_matrix(self): @@ -141,10 +141,10 @@ def test_rightadd_matrix(self): M = 2 * np.ones((10, 10)) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) + M - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftadd_matrixt(self): @@ -152,10 +152,10 @@ def test_leftadd_matrixt(self): M = 2 * at.ones((10, 10)) with pm.Model() as model: cov = M + pm.gp.cov.ExpQuad(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_matrix(self): @@ -164,8 +164,8 @@ def test_leftprod_matrix(self): with pm.Model() as model: cov = M + pm.gp.cov.ExpQuad(1, 0.1) cov_true = pm.gp.cov.ExpQuad(1, 0.1) + M - K = aesara.function([], cov(X))() - K_true = aesara.function([], cov_true(X))() + K = cov(X).eval() + K_true = cov_true(X).eval() assert np.allclose(K, K_true) def test_inv_rightadd(self): @@ -181,10 +181,10 @@ def test_symprod_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 * cov2 - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightprod_scalar(self): @@ -192,10 +192,10 @@ def test_rightprod_scalar(self): with pm.Model() as model: a = 2 cov = pm.gp.cov.ExpQuad(1, 0.1) * a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_scalar(self): @@ -203,10 +203,10 @@ def test_leftprod_scalar(self): with pm.Model() as model: a = 2 cov = a * pm.gp.cov.ExpQuad(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightprod_matrix(self): @@ -214,10 +214,10 @@ def test_rightprod_matrix(self): M = 2 * np.ones((10, 10)) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) * M - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_matrix(self): @@ -226,8 +226,8 @@ def test_leftprod_matrix(self): with pm.Model() as model: cov = M * pm.gp.cov.ExpQuad(1, 0.1) cov_true = pm.gp.cov.ExpQuad(1, 0.1) * M - K = aesara.function([], cov(X))() - K_true = aesara.function([], cov_true(X))() + K = cov(X).eval() + K_true = cov_true(X).eval() assert np.allclose(K, K_true) def test_multiops(self): @@ -244,12 +244,12 @@ def test_multiops(self): + pm.gp.cov.ExpQuad(1, 0.1) + 3 ) - K1 = aesara.function([], cov1(X))() - K2 = aesara.function([], cov2(X))() + K1 = cov1(X).eval() + K2 = cov2(X).eval() assert np.allclose(K1, K2) # check diagonal - K1d = aesara.function([], cov1(X, diag=True))() - K2d = aesara.function([], cov2(X, diag=True))() + K1d = cov1(X, diag=True).eval() + K2d = cov2(X, diag=True).eval() npt.assert_allclose(np.diag(K1), K2d, atol=1e-5) npt.assert_allclose(np.diag(K2), K1d, atol=1e-5) @@ -265,10 +265,10 @@ def test_symexp_cov(self): with pm.Model() as model: cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 ** 2 - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_covexp_numpy(self): @@ -276,10 +276,10 @@ def test_covexp_numpy(self): with pm.Model() as model: a = np.array([[2]]) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_covexp_aesara(self): @@ -287,10 +287,10 @@ def test_covexp_aesara(self): with pm.Model() as model: a = at.alloc(2.0, 1, 1) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_covexp_shared(self): @@ -298,10 +298,10 @@ def test_covexp_shared(self): with pm.Model() as model: a = aesara.shared(2.0) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_invalid_covexp(self): @@ -321,11 +321,11 @@ def test_symprod_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = pm.gp.cov.Kron([cov1, cov2]) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 1 * 0.53940, atol=1e-3) npt.assert_allclose(K[0, 11], 0.53940 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_multiops(self): @@ -342,8 +342,8 @@ def test_multiops(self): ) cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1) cov = pm.gp.cov.Kron([cov1, cov2]) - K_true = kronecker(aesara.function([], cov1(X1))(), aesara.function([], cov2(X2))()).eval() - K = aesara.function([], cov(X))() + K_true = kronecker(cov1(X1).eval(), cov2(X2).eval()).eval() + K = cov(X).eval() npt.assert_allclose(K_true, K) @@ -352,30 +352,30 @@ def test_slice1(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, 0.1, active_dims=[0, 0, 1]) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.20084298, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_slice2(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1, 2]) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_slice3(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1, 2]) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_diffslice(self): @@ -384,10 +384,10 @@ def test_diffslice(self): cov = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 0, 0]) + pm.gp.cov.ExpQuad( 3, ls=[0.1, 0.2, 0.3] ) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.683572, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -402,7 +402,7 @@ def test_stable(self): X = np.random.uniform(low=320.0, high=400.0, size=[2000, 2]) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, 0.1) - dists = aesara.function([], cov.square_dist(X, X))() + dists = cov.square_dist(X, X).eval() assert not np.any(dists < 0) @@ -411,44 +411,44 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_2d(self): X = np.linspace(0, 1, 10).reshape(5, 2) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, 0.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.820754, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_2dard(self): X = np.linspace(0, 1, 10).reshape(5, 2) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, np.array([1, 2])) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.969607, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_inv_lengthscale(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, ls_inv=10) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -457,14 +457,14 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.WhiteNoise(sigma=0.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.0, atol=1e-3) npt.assert_allclose(K[0, 0], 0.5 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) # check predict - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.0, atol=1e-3) # white noise predicting should return all zeros npt.assert_allclose(K[0, 0], 0.0, atol=1e-3) @@ -475,14 +475,14 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Constant(2.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2.5, atol=1e-3) npt.assert_allclose(K[0, 0], 2.5, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 2.5, atol=1e-3) npt.assert_allclose(K[0, 0], 2.5, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -491,12 +491,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.RatQuad(1, ls=0.1, alpha=0.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -505,12 +505,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Exponential(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -519,12 +519,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern52(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -533,12 +533,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern32(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -547,11 +547,11 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern12(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3) - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -560,12 +560,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Cosine(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.766, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.766, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -574,12 +574,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Periodic(1, 0.1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -588,12 +588,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Linear(1, 0.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -602,12 +602,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Polynomial(1, 0.5, 2, 0) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -621,12 +621,12 @@ def warp_func(x, a, b, c): with pm.Model() as model: cov_m52 = pm.gp.cov.Matern52(1, 0.2) cov = pm.gp.cov.WarpedInput(1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -646,12 +646,12 @@ def tanh_func(x, x1, x2, w, x0): with pm.Model() as model: cov = pm.gp.cov.Gibbs(1, tanh_func, args=(0.05, 0.6, 0.4, 1.0)) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -673,12 +673,12 @@ def scaling_func(x, a, b): with pm.Model() as model: cov_m52 = pm.gp.cov.Matern52(1, 0.2) cov = pm.gp.cov.ScaledCov(1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -689,6 +689,34 @@ def test_raises(self): pm.gp.cov.ScaledCov(1, "str is not Covariance object", lambda x: x) +class TestCircular: + def test_1d_tau1(self): + X = np.linspace(0, 1, 10)[:, None] + etalon = 0.600881 + with pm.Model(): + cov = pm.gp.cov.Circular(1, 1, tau=5) + K = cov(X).eval() + npt.assert_allclose(K[0, 1], etalon, atol=1e-3) + K = cov(X, X).eval() + npt.assert_allclose(K[0, 1], etalon, atol=1e-3) + # check diagonal + Kd = cov(X, diag=True).eval() + npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + + def test_1d_tau2(self): + X = np.linspace(0, 1, 10)[:, None] + etalon = 0.691239 + with pm.Model(): + cov = pm.gp.cov.Circular(1, 1, tau=4) + K = cov(X).eval() + npt.assert_allclose(K[0, 1], etalon, atol=1e-3) + K = cov(X, X).eval() + npt.assert_allclose(K[0, 1], etalon, atol=1e-3) + # check diagonal + Kd = cov(X, diag=True).eval() + npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + + class TestHandleArgs: def test_handleargs(self): def func_noargs(x): @@ -767,22 +795,21 @@ def test_raises3(self): B = pm.gp.cov.Coregion(1) -@pytest.mark.xfail(reason="MvNormal was not yet refactored") class TestMarginalVsLatent: R""" Compare the logp of models Marginal, noise=0 and Latent. """ def setup_method(self): - X = np.random.randn(50, 3) - y = np.random.randn(50) * 0.01 - Xnew = np.random.randn(60, 3) - pnew = np.random.randn(60) * 0.01 + X = np.random.randn(20, 3) + y = np.random.randn(20) + Xnew = np.random.randn(30, 3) + pnew = np.random.randn(30) with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.Marginal(mean_func, cov_func) - f = gp.marginal_likelihood("f", X, y, noise=0.0, is_observed=False, observed=y) + gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func) + f = gp.marginal_likelihood("f", X, y, noise=0.0) p = gp.conditional("p", Xnew) self.logp = model.logp({"p": pnew}) self.X = X @@ -794,7 +821,7 @@ def testLatent1(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.Latent(mean_func, cov_func) + gp = pm.gp.Latent(mean_func=mean_func, cov_func=cov_func) f = gp.prior("f", self.X, reparameterize=False) p = gp.conditional("p", self.Xnew) latent_logp = model.logp({"f": self.y, "p": self.pnew}) @@ -804,7 +831,7 @@ def testLatent2(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.Latent(mean_func, cov_func) + gp = pm.gp.Latent(mean_func=mean_func, cov_func=cov_func) f = gp.prior("f", self.X, reparameterize=True) p = gp.conditional("p", self.Xnew) chol = np.linalg.cholesky(cov_func(self.X).eval()) @@ -813,7 +840,6 @@ def testLatent2(self): npt.assert_allclose(latent_logp, self.logp, atol=5) -@pytest.mark.xfail(reason="The `gp.predict` method was not yet refactored") class TestMarginalVsMarginalSparse: R""" Compare logp of models Marginal and MarginalSparse. @@ -822,13 +848,13 @@ class TestMarginalVsMarginalSparse: def setup_method(self): X = np.random.randn(50, 3) - y = np.random.randn(50) * 0.01 + y = np.random.randn(50) Xnew = np.random.randn(60, 3) - pnew = np.random.randn(60) * 0.01 + pnew = np.random.randn(60) with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.Marginal(mean_func, cov_func) + gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func) sigma = 0.1 f = gp.marginal_likelihood("f", X, y, noise=sigma) p = gp.conditional("p", Xnew) @@ -845,7 +871,7 @@ def testApproximations(self, approx): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.MarginalSparse(mean_func, cov_func, approx=approx) + gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx=approx) f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) p = gp.conditional("p", self.Xnew) approx_logp = model.logp({"f": self.y, "p": self.pnew}) @@ -856,10 +882,10 @@ def testPredictVar(self, approx): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.MarginalSparse(mean_func, cov_func, approx=approx) + gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx=approx) f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) - mu1, var1 = self.gp.predict(self.Xnew, diag=True) - mu2, var2 = gp.predict(self.Xnew, diag=True) + mu1, var1 = self.gp.predict(self.Xnew, diag=True) + mu2, var2 = gp.predict(self.Xnew, diag=True) npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3) npt.assert_allclose(var1, var2, atol=0, rtol=1e-3) @@ -867,10 +893,10 @@ def testPredictCov(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.MarginalSparse(mean_func, cov_func, approx="DTC") - f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False) - mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True) - mu2, cov2 = gp.predict(self.Xnew, pred_noise=True) + gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx="DTC") + f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) + mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True) + mu2, cov2 = gp.predict(self.Xnew, pred_noise=True) npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3) npt.assert_allclose(cov1, cov2, atol=0, rtol=1e-3) @@ -878,7 +904,7 @@ def testPredictCov(self): class TestGPAdditive: def setup_method(self): self.X = np.random.randn(50, 3) - self.y = np.random.randn(50) * 0.01 + self.y = np.random.randn(50) self.Xnew = np.random.randn(60, 3) self.noise = pm.gp.cov.WhiteNoise(0.1) self.covs = ( @@ -888,19 +914,20 @@ def setup_method(self): ) self.means = (pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5)) - @pytest.mark.xfail(reason="MvNormal was not yet refactored") def testAdditiveMarginal(self): with pm.Model() as model1: - gp1 = pm.gp.Marginal(self.means[0], self.covs[0]) - gp2 = pm.gp.Marginal(self.means[1], self.covs[1]) - gp3 = pm.gp.Marginal(self.means[2], self.covs[2]) + gp1 = pm.gp.Marginal(mean_func=self.means[0], cov_func=self.covs[0]) + gp2 = pm.gp.Marginal(mean_func=self.means[1], cov_func=self.covs[1]) + gp3 = pm.gp.Marginal(mean_func=self.means[2], cov_func=self.covs[2]) gpsum = gp1 + gp2 + gp3 fsum = gpsum.marginal_likelihood("f", self.X, self.y, noise=self.noise) model1_logp = model1.logp({"fsum": self.y}) with pm.Model() as model2: - gptot = pm.gp.Marginal(reduce(add, self.means), reduce(add, self.covs)) + gptot = pm.gp.Marginal( + mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs) + ) fsum = gptot.marginal_likelihood("f", self.X, self.y, noise=self.noise) model2_logp = model2.logp({"fsum": self.y}) npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) @@ -913,17 +940,24 @@ def testAdditiveMarginal(self): fp2 = gptot.conditional("fp2", self.Xnew) fp = np.random.randn(self.Xnew.shape[0]) - npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2) + logp1 = model1.logp({"fp1": fp}) + logp2 = model2.logp({"fp2": fp}) + npt.assert_allclose(logp1, logp2, atol=0, rtol=1e-2) - @pytest.mark.xfail(reason="MarginalSparse must be refactored to not use DensityDist") @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"]) def testAdditiveMarginalSparse(self, approx): Xu = np.random.randn(10, 3) sigma = 0.1 with pm.Model() as model1: - gp1 = pm.gp.MarginalSparse(self.means[0], self.covs[0], approx=approx) - gp2 = pm.gp.MarginalSparse(self.means[1], self.covs[1], approx=approx) - gp3 = pm.gp.MarginalSparse(self.means[2], self.covs[2], approx=approx) + gp1 = pm.gp.MarginalSparse( + mean_func=self.means[0], cov_func=self.covs[0], approx=approx + ) + gp2 = pm.gp.MarginalSparse( + mean_func=self.means[1], cov_func=self.covs[1], approx=approx + ) + gp3 = pm.gp.MarginalSparse( + mean_func=self.means[2], cov_func=self.covs[2], approx=approx + ) gpsum = gp1 + gp2 + gp3 fsum = gpsum.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma) @@ -931,7 +965,7 @@ def testAdditiveMarginalSparse(self, approx): with pm.Model() as model2: gptot = pm.gp.MarginalSparse( - reduce(add, self.means), reduce(add, self.covs), approx=approx + mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs), approx=approx ) fsum = gptot.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma) model2_logp = model2.logp({"fsum": self.y}) @@ -946,24 +980,25 @@ def testAdditiveMarginalSparse(self, approx): with model2: fp2 = gptot.conditional("fp2", self.Xnew) - fp = np.random.randn(self.Xnew.shape[0], 1) - npt.assert_allclose( - pm.logp(fp1, fp1).eval({fp1: fp}), pm.logp(fp2, fp2).eval({fp2: fp}), atol=0, rtol=1e-2 - ) + fp = np.random.randn(self.Xnew.shape[0]) + + model1_logp = model1.logp({"fp1": fp, "fsum": self.y}) + model2_logp = model2.logp({"fp2": fp, "fsum": self.y}) + + npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) - @pytest.mark.xfail(reason="MvNormal was not yet refactored") def testAdditiveLatent(self): with pm.Model() as model1: - gp1 = pm.gp.Latent(self.means[0], self.covs[0]) - gp2 = pm.gp.Latent(self.means[1], self.covs[1]) - gp3 = pm.gp.Latent(self.means[2], self.covs[2]) + gp1 = pm.gp.Latent(mean_func=self.means[0], cov_func=self.covs[0]) + gp2 = pm.gp.Latent(mean_func=self.means[1], cov_func=self.covs[1]) + gp3 = pm.gp.Latent(mean_func=self.means[2], cov_func=self.covs[2]) gpsum = gp1 + gp2 + gp3 fsum = gpsum.prior("fsum", self.X, reparameterize=False) model1_logp = model1.logp({"fsum": self.y}) with pm.Model() as model2: - gptot = pm.gp.Latent(reduce(add, self.means), reduce(add, self.covs)) + gptot = pm.gp.Latent(mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs)) fsum = gptot.prior("fsum", self.X, reparameterize=False) model2_logp = model2.logp({"fsum": self.y}) npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) @@ -974,12 +1009,9 @@ def testAdditiveLatent(self): fp2 = gptot.conditional("fp2", self.Xnew) fp = np.random.randn(self.Xnew.shape[0]) - npt.assert_allclose( - fp1.logp({"fsum": self.y, "fp1": fp}), - fp2.logp({"fsum": self.y, "fp2": fp}), - atol=0, - rtol=1e-2, - ) + logp1 = model1.logp({"fsum": self.y, "fp1": fp}) + logp2 = model2.logp({"fsum": self.y, "fp2": fp}) + npt.assert_allclose(logp1, logp2, atol=0, rtol=1e-2) def testAdditiveSparseRaises(self): # cant add different approximations @@ -1007,7 +1039,6 @@ def testAdditiveTypeRaises2(self): gp1 + gp2 -@pytest.mark.xfail(reason="MvNormal was not yet refactored") class TestTP: R""" Compare TP with high degress of freedom to GP @@ -1015,41 +1046,41 @@ class TestTP: def setup_method(self): X = np.random.randn(20, 3) - y = np.random.randn(20) * 0.01 - Xnew = np.random.randn(50, 3) - pnew = np.random.randn(50) * 0.01 - with pm.Model() as model: + y = np.random.randn(20) + Xnew = np.random.randn(30, 3) + pnew = np.random.randn(30) + + with pm.Model() as model1: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) gp = pm.gp.Latent(cov_func=cov_func) f = gp.prior("f", X, reparameterize=False) p = gp.conditional("p", Xnew) + self.gp_latent_logp = model1.logp({"f": y, "p": pnew}) self.X = X self.y = y self.Xnew = Xnew self.pnew = pnew - self.latent_logp = model.logp({"f": y, "p": pnew}) - self.plogp = p.logp({"f": y, "p": pnew}) + self.nu = 10000 def testTPvsLatent(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - tp = pm.gp.TP(cov_func=cov_func, nu=10000) + tp = pm.gp.TP(cov_func=cov_func, nu=self.nu) f = tp.prior("f", self.X, reparameterize=False) p = tp.conditional("p", self.Xnew) tp_logp = model.logp({"f": self.y, "p": self.pnew}) - npt.assert_allclose(self.latent_logp, tp_logp, atol=0, rtol=1e-2) + npt.assert_allclose(self.gp_latent_logp, tp_logp, atol=0, rtol=1e-2) def testTPvsLatentReparameterized(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - tp = pm.gp.TP(cov_func=cov_func, nu=10000) + tp = pm.gp.TP(cov_func=cov_func, nu=self.nu) f = tp.prior("f", self.X, reparameterize=True) p = tp.conditional("p", self.Xnew) chol = np.linalg.cholesky(cov_func(self.X).eval()) - y_rotated = np.linalg.solve(chol, self.y) - # testing full model logp unreliable due to introduction of f_chi2__log__ - plogp = p.logp({"f_rotated_": y_rotated, "p": self.pnew, "f_chi2__log__": np.log(1e20)}) - npt.assert_allclose(self.plogp, plogp, atol=0, rtol=1e-2) + f_rotated = np.linalg.solve(chol, self.y) + tp_logp = model.logp({"f_rotated_": f_rotated, "p": self.pnew}) + npt.assert_allclose(self.gp_latent_logp, tp_logp, atol=0, rtol=1e-2) def testAdditiveTPRaises(self): with pm.Model() as model: @@ -1060,7 +1091,6 @@ def testAdditiveTPRaises(self): gp1 + gp2 -@pytest.mark.xfail(reason="MvNormal was not yet refactored") class TestLatentKron: """ Compare gp.LatentKron to gp.Latent, both with Gaussian noise. @@ -1077,7 +1107,7 @@ def setup_method(self): self.y = np.random.randn(self.N) * 0.1 self.Xnews = (np.random.randn(5, 1), np.random.randn(5, 1), np.random.randn(5, 1)) self.Xnew = np.concatenate(self.Xnews, axis=1) - self.pnew = np.random.randn(len(self.Xnew)) * 0.01 + self.pnew = np.random.randn(len(self.Xnew)) ls = 0.2 with pm.Model() as latent_model: self.cov_funcs = ( @@ -1116,7 +1146,6 @@ def testLatentKronRaisesSizes(self): gp.prior("f", Xs=[np.linspace(0, 1, 7)[:, None], np.linspace(0, 1, 5)[:, None]]) -@pytest.mark.xfail(reason="The `gp.predict` method was not yet refactored") class TestMarginalKron: """ Compare gp.MarginalKron to gp.Marginal. @@ -1134,7 +1163,8 @@ def setup_method(self): self.Xnews = (np.random.randn(5, 1), np.random.randn(5, 1), np.random.randn(5, 1)) self.Xnew = np.concatenate(self.Xnews, axis=1) self.sigma = 0.2 - self.pnew = np.random.randn(len(self.Xnew)) * 0.01 + self.pnew = np.random.randn(len(self.Xnew)) + ls = 0.2 with pm.Model() as model: self.cov_funcs = [ @@ -1153,16 +1183,19 @@ def setup_method(self): def testMarginalKronvsMarginalpredict(self): with pm.Model() as kron_model: kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) - f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N) + f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma) p = kron_gp.conditional("p", self.Xnew) mu, cov = kron_gp.predict(self.Xnew) npt.assert_allclose(mu, self.mu, atol=1e-5, rtol=1e-2) npt.assert_allclose(cov, self.cov, atol=1e-5, rtol=1e-2) + with kron_model: + _, var = kron_gp.predict(self.Xnew, diag=True) + npt.assert_allclose(np.diag(cov), var, atol=1e-5, rtol=1e-2) def testMarginalKronvsMarginal(self): with pm.Model() as kron_model: kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) - f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N) + f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma) p = kron_gp.conditional("p", self.Xnew) kron_logp = kron_model.logp({"p": self.pnew}) npt.assert_allclose(kron_logp, self.logp, atol=0, rtol=1e-2) @@ -1175,7 +1208,7 @@ def testMarginalKronRaises(self): gp1 + gp2 -class TestUtil: +class TestPlotGP: def test_plot_gp_dist(self): """Test that the plotting helper works with the stated input shapes.""" import matplotlib.pyplot as plt @@ -1204,29 +1237,54 @@ def test_plot_gp_dist_warn_nan(self): pass -class TestCircular: - def test_1d_tau1(self): - X = np.linspace(0, 1, 10)[:, None] - etalon = 0.600881 - with pm.Model(): - cov = pm.gp.cov.Circular(1, 1, tau=5) - K = aesara.function([], cov(X))() - npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - K = aesara.function([], cov(X, X))() - npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - # check diagonal - Kd = aesara.function([], cov(X, diag=True))() - npt.assert_allclose(np.diag(K), Kd, atol=1e-5) +class TestKmeansInducing: + def setup_method(self): + self.centers = (-5, 5) + self.x = np.concatenate( + (self.centers[0] + np.random.randn(500), self.centers[1] + np.random.randn(500)) + ) - def test_1d_tau2(self): - X = np.linspace(0, 1, 10)[:, None] - etalon = 0.691239 - with pm.Model(): - cov = pm.gp.cov.Circular(1, 1, tau=4) - K = aesara.function([], cov(X))() - npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - K = aesara.function([], cov(X, X))() - npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - # check diagonal - Kd = aesara.function([], cov(X, diag=True))() - npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + def test_kmeans(self): + X = self.x[:, None] + Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten() + npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1) + + X = at.as_tensor_variable(self.x[:, None]) + Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten() + npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1) + + def test_kmeans_raises(self): + with pytest.raises(TypeError): + Xu = pm.gp.util.kmeans_inducing_points(2, "str is the wrong type").flatten() + + +class TestReplaceWithValues: + def test_basic_replace(self): + with pm.Model() as model: + a = pm.Normal("a") + b = pm.Normal("b", mu=a) + c = a * b + + (c_val,) = pm.gp.util.replace_with_values( + [c], replacements={"a": 2, "b": 3, "x": 100}, model=model + ) + assert c_val == np.array(6.0) + + def test_replace_no_inputs_needed(self): + with pm.Model() as model: + a = at.as_tensor_variable(2.0) + b = 1.0 + a + c = a * b + (c_val,) = pm.gp.util.replace_with_values([c], replacements={"x": 100}) + assert c_val == np.array(6.0) + + def test_missing_input(self): + with pm.Model() as model: + a = pm.Normal("a") + b = pm.Normal("b", mu=a) + c = a * b + + with pytest.raises(ValueError): + (c_val,) = pm.gp.util.replace_with_values( + [c], replacements={"a": 2, "x": 100}, model=model + )