From 984a1d04bce65c07e399a4ca1676476da472f440 Mon Sep 17 00:00:00 2001 From: bwengals Date: Tue, 5 Oct 2021 10:46:27 -0700 Subject: [PATCH 01/32] np.int -> int, fix np DepricationWarning --- pymc/gp/cov.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index ccbbe59b81e..bbeeb41026f 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -64,7 +64,7 @@ def __init__(self, input_dim, active_dims=None): if active_dims is None: self.active_dims = np.arange(input_dim) else: - self.active_dims = np.asarray(active_dims, np.int) + self.active_dims = np.asarray(active_dims, int) def __call__(self, X, Xs=None, diag=False): r""" From bd916ae1ad08a888d9589f12ce20ea92b8396f57 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 6 Oct 2021 14:22:10 -0700 Subject: [PATCH 02/32] remove shape arg from non-kron implementations, TODO for is_observed in Marginal, mark for deprecation --- pymc/gp/gp.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 9f913474d46..4fe44ab030f 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -223,8 +223,7 @@ def conditional(self, name, Xnew, given=None, **kwargs): """ givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, *givens) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @conditioned_vars(["X", "f", "nu"]) @@ -341,8 +340,7 @@ def conditional(self, name, Xnew, **kwargs): X = self.X f = self.f nu2, mu, cov = self._build_conditional(Xnew, X, f) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, size=shape, **kwargs) + return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, **kwargs) @conditioned_vars(["X", "y", "noise"]) @@ -422,9 +420,6 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): noise: scalar, Variable, or Covariance Standard deviation of the Gaussian noise. Can also be a Covariance for non-white noise. - is_observed: bool - Whether to set `y` as an `observed` variable in the `model`. - Default is `True`. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -436,11 +431,12 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): self.X = X self.y = y self.noise = noise + # TODO: deprecate is_observed. untested, shouldnt be used. if is_observed: return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs) else: - shape = infer_shape(X, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + # shape = infer_shape(X, kwargs.pop("shape", None)) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) def _get_given_vals(self, given): if given is None: @@ -517,8 +513,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): R""" @@ -829,8 +824,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @conditioned_vars(["Xs", "f"]) From 490311faf4a713b22590515843fcd187fc85450c Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 6 Oct 2021 14:27:20 -0700 Subject: [PATCH 03/32] np.int -> int in gp/util.py --- pymc/gp/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/gp/util.py b/pymc/gp/util.py index f47331248c1..3c32ab3c392 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -48,7 +48,7 @@ def infer_shape(X, n_points=None): """ if n_points is None: try: - n_points = np.int(X.shape[0]) + n_points = int(X.shape[0]) except TypeError: raise TypeError("Cannot infer 'shape', provide as an argument") return n_points From a64975d90c281ea918b34a143b9db96c4b3efbe8 Mon Sep 17 00:00:00 2001 From: bwengals Date: Thu, 7 Oct 2021 13:43:16 -0700 Subject: [PATCH 04/32] force all mean_func, cov_func args to GP constructors to be required kwargs (often default zero mean_func is used) --- pymc/gp/gp.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 4fe44ab030f..58a73b96c7c 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -41,7 +41,7 @@ class Base: Base class. """ - def __init__(self, mean_func=Zero(), cov_func=Constant(0.0)): + def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)): self.mean_func = mean_func self.cov_func = cov_func @@ -122,8 +122,8 @@ class Latent(Base): fcond = gp.conditional("fcond", Xnew=Xnew) """ - def __init__(self, mean_func=Zero(), cov_func=Constant(0.0)): - super().__init__(mean_func, cov_func) + def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)): + super().__init__(mean_func=mean_func, cov_func=cov_func) def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) @@ -256,11 +256,11 @@ class TP(Latent): Processes as Alternatives to Gaussian Processes. arXiv preprint arXiv:1402.4306. """ - def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), nu=None): + def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0), nu=None): if nu is None: raise ValueError("Student's T process requires a degrees of freedom parameter, 'nu'") self.nu = nu - super().__init__(mean_func, cov_func) + super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): raise TypeError("Student's T processes aren't additive") @@ -638,7 +638,7 @@ def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), approx="FITC"): if approx not in self._available_approx: raise NotImplementedError(approx) self.approx = approx - super().__init__(mean_func, cov_func) + super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): # new_gp will default to FITC approx @@ -881,13 +881,13 @@ class LatentKron(Base): fcond = gp.conditional("fcond", Xnew=Xnew) """ - def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))): + def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))): try: self.cov_funcs = list(cov_funcs) except TypeError: self.cov_funcs = [cov_funcs] cov_func = pm.gp.cov.Kron(self.cov_funcs) - super().__init__(mean_func, cov_func) + super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): raise TypeError("Additive, Kronecker-structured processes not implemented") @@ -1037,13 +1037,13 @@ class MarginalKron(Base): fcond = gp.conditional("fcond", Xnew=Xnew) """ - def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))): + def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))): try: self.cov_funcs = list(cov_funcs) except TypeError: self.cov_funcs = [cov_funcs] cov_func = pm.gp.cov.Kron(self.cov_funcs) - super().__init__(mean_func, cov_func) + super().__init__(mean_func=mean_func, cov_func=cov_func) def __add__(self, other): raise TypeError("Additive, Kronecker-structured processes not implemented") From 77c4392c9c45488a8b236625644f335c2a6ae127 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 20 Oct 2021 14:15:14 -0700 Subject: [PATCH 05/32] fix predictt functions, rename to _predict_at. because theano -> aesara --- pymc/gp/gp.py | 17 +++++++---------- pymc/gp/util.py | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 58a73b96c7c..f1fc4bf2d83 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -27,6 +27,7 @@ cholesky, conditioned_vars, infer_shape, + replace_with_value, solve_lower, solve_upper, stabilize, @@ -540,12 +541,10 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): if given is None: given = {} - mu, cov = self.predictt(Xnew, diag, pred_noise, given) - # XXX: This needs to be refactored - # return draw_values([mu, cov], point=point) - return None, None + mu, cov = self._predict_at(Xnew, diag, pred_noise, given) + return replace_with_values([mu, cov], replacements=point) - def predictt(self, Xnew, diag=False, pred_noise=False, given=None): + def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None): R""" Return the mean vector and covariance matrix of the conditional distribution as symbolic variables. @@ -1199,12 +1198,10 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False): Whether or not observation noise is included in the conditional. Default is `False`. """ - mu, cov = self._build_conditional(Xnew, pred_noise, diag) - # XXX: This needs to be refactored - # return draw_values([mu, cov], point=point) - return None, None + mu, cov = self._predict_at(Xnew, pred_noise, diag) + return replace_with_values([mu, cov], replacements=point) - def predictt(self, Xnew, diag=False, pred_noise=False): + def _predict_at(self, Xnew, diag=False, pred_noise=False): R""" Return the mean vector and covariance matrix of the conditional distribution as symbolic variables. diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 3c32ab3c392..29edfcf3901 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -14,9 +14,12 @@ import warnings +from typing import Dict + import aesara.tensor as at import numpy as np +from aesara.compile import SharedVariable from aesara.tensor.slinalg import ( # noqa: W0611; pylint: disable=unused-import cholesky, solve, @@ -30,6 +33,37 @@ from aesara.tensor.var import TensorConstant from scipy.cluster.vq import kmeans +from pymc.aesaraf import compile_rv_inplace, walk_model + + +def replace_with_value(vars_needed, replacements: Dict): + R""" + Replace random variable nodes in the graph with values given in replacements. + + NOTE TO REVIEWER: Modified this from `sample_posterior_predictive`. Is there a better way to do this? + """ + inputs, input_names = [], [] + for rv in walk_model(vars_needed, walk_past_rvs=True): + if rv in model.named_vars.values() and not isinstance(rv, SharedVariable): + inputs.append(rv) + input_names.append(rv.name) + + fn = compile_rv_inplace( + inputs, + vars_needed, + allow_input_downcast=True, + accept_inplace=True, + on_unused_input="ignore", + ) + + replacements = {name: val for name, val in replacements.items() if name in input_names} + missing = set(needed) - set(replacements.keys()) + if len(missing) > 0: + missing_str = ", ".join(missing) + raise ValueError(f"Values for {missing_str} must be included in `replacements`.") + + return fn(**replacements) + def infer_shape(X, n_points=None): R""" From d1e09fed62c323acb19c32a0c5b71e4473ef67e9 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 27 Oct 2021 16:26:41 -0700 Subject: [PATCH 06/32] fix TP tests, force mean_func, cov_func to be req kwarg --- pymc/tests/test_gp.py | 44 +++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index 424e87e5e8b..d57adbabccb 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -767,7 +767,7 @@ def test_raises3(self): B = pm.gp.cov.Coregion(1) -@pytest.mark.xfail(reason="MvNormal was not yet refactored") +# @pytest.mark.xfail(reason="MvNormal was not yet refactored") class TestMarginalVsLatent: R""" Compare the logp of models Marginal, noise=0 and Latent. @@ -781,7 +781,7 @@ def setup_method(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.Marginal(mean_func, cov_func) + gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func) f = gp.marginal_likelihood("f", X, y, noise=0.0, is_observed=False, observed=y) p = gp.conditional("p", Xnew) self.logp = model.logp({"p": pnew}) @@ -794,7 +794,7 @@ def testLatent1(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.Latent(mean_func, cov_func) + gp = pm.gp.Latent(mean_func=mean_func, cov_func=cov_func) f = gp.prior("f", self.X, reparameterize=False) p = gp.conditional("p", self.Xnew) latent_logp = model.logp({"f": self.y, "p": self.pnew}) @@ -804,7 +804,7 @@ def testLatent2(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.Latent(mean_func, cov_func) + gp = pm.gp.Latent(mean_func=mean_func, cov_func=cov_func) f = gp.prior("f", self.X, reparameterize=True) p = gp.conditional("p", self.Xnew) chol = np.linalg.cholesky(cov_func(self.X).eval()) @@ -828,7 +828,7 @@ def setup_method(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.Marginal(mean_func, cov_func) + gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func) sigma = 0.1 f = gp.marginal_likelihood("f", X, y, noise=sigma) p = gp.conditional("p", Xnew) @@ -845,7 +845,7 @@ def testApproximations(self, approx): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.MarginalSparse(mean_func, cov_func, approx=approx) + gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx=approx) f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) p = gp.conditional("p", self.Xnew) approx_logp = model.logp({"f": self.y, "p": self.pnew}) @@ -856,7 +856,7 @@ def testPredictVar(self, approx): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.MarginalSparse(mean_func, cov_func, approx=approx) + gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx=approx) f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) mu1, var1 = self.gp.predict(self.Xnew, diag=True) mu2, var2 = gp.predict(self.Xnew, diag=True) @@ -867,7 +867,7 @@ def testPredictCov(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) - gp = pm.gp.MarginalSparse(mean_func, cov_func, approx="DTC") + gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx="DTC") f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False) mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True) mu2, cov2 = gp.predict(self.Xnew, pred_noise=True) @@ -888,7 +888,7 @@ def setup_method(self): ) self.means = (pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5)) - @pytest.mark.xfail(reason="MvNormal was not yet refactored") + # @pytest.mark.xfail(reason="MvNormal was not yet refactored") def testAdditiveMarginal(self): with pm.Model() as model1: gp1 = pm.gp.Marginal(self.means[0], self.covs[0]) @@ -1007,7 +1007,7 @@ def testAdditiveTypeRaises2(self): gp1 + gp2 -@pytest.mark.xfail(reason="MvNormal was not yet refactored") +# @pytest.mark.xfail(reason="MvNormal was not yet refactored") class TestTP: R""" Compare TP with high degress of freedom to GP @@ -1015,9 +1015,9 @@ class TestTP: def setup_method(self): X = np.random.randn(20, 3) - y = np.random.randn(20) * 0.01 - Xnew = np.random.randn(50, 3) - pnew = np.random.randn(50) * 0.01 + y = np.random.randn(20) + Xnew = np.random.randn(30, 3) + pnew = np.random.randn(30) with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) gp = pm.gp.Latent(cov_func=cov_func) @@ -1027,29 +1027,29 @@ def setup_method(self): self.y = y self.Xnew = Xnew self.pnew = pnew - self.latent_logp = model.logp({"f": y, "p": pnew}) - self.plogp = p.logp({"f": y, "p": pnew}) + self.nu = 1000 + self.gp_latent_logp = model.logp({"f": y, "p": pnew}) def testTPvsLatent(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - tp = pm.gp.TP(cov_func=cov_func, nu=10000) + tp = pm.gp.TP(cov_func=cov_func, nu=self.nu) f = tp.prior("f", self.X, reparameterize=False) p = tp.conditional("p", self.Xnew) tp_logp = model.logp({"f": self.y, "p": self.pnew}) - npt.assert_allclose(self.latent_logp, tp_logp, atol=0, rtol=1e-2) + npt.assert_allclose(self.gp_latent_logp, tp_logp, atol=0, rtol=1e-2) def testTPvsLatentReparameterized(self): with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) - tp = pm.gp.TP(cov_func=cov_func, nu=10000) + tp = pm.gp.TP(cov_func=cov_func, nu=self.nu) f = tp.prior("f", self.X, reparameterize=True) p = tp.conditional("p", self.Xnew) chol = np.linalg.cholesky(cov_func(self.X).eval()) - y_rotated = np.linalg.solve(chol, self.y) - # testing full model logp unreliable due to introduction of f_chi2__log__ - plogp = p.logp({"f_rotated_": y_rotated, "p": self.pnew, "f_chi2__log__": np.log(1e20)}) - npt.assert_allclose(self.plogp, plogp, atol=0, rtol=1e-2) + f_rotated = np.linalg.solve(chol, self.y) + + tp_logp = model.logp({"f_rotated_": f_rotated, "p": self.pnew}) + npt.assert_allclose(self.gp_latent_logp, tp_logp, atol=0, rtol=1e-2) def testAdditiveTPRaises(self): with pm.Model() as model: From 31150c34fe0a6e849ddf69fb3fa65353baf06254 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 27 Oct 2021 16:27:51 -0700 Subject: [PATCH 07/32] fix TP reparameterization to sample studentt instead of chi2/norm --- pymc/gp/gp.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index f1fc4bf2d83..1553a5e0cf8 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -26,7 +26,7 @@ from pymc.gp.util import ( cholesky, conditioned_vars, - infer_shape, + infer_size, replace_with_value, solve_lower, solve_upper, @@ -129,12 +129,13 @@ def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)): def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) cov = stabilize(self.cov_func(X)) - shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: - v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=shape, **kwargs) + size = infer_size(X, kwargs.pop("size", None)) + print("_build_prior:size", size) + v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=size, **kwargs) f = pm.Deterministic(name, mu + cholesky(cov).dot(v)) else: - f = pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + f = pm.MvNormal(name, mu=mu, cov=cov, **kwargs) return f def prior(self, name, X, reparameterize=True, **kwargs): @@ -269,13 +270,12 @@ def __add__(self, other): def _build_prior(self, name, X, reparameterize=True, **kwargs): mu = self.mean_func(X) cov = stabilize(self.cov_func(X)) - shape = infer_shape(X, kwargs.pop("shape", None)) if reparameterize: - chi2 = pm.ChiSquared(name + "_chi2_", self.nu) - v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=shape, **kwargs) - f = pm.Deterministic(name, (at.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v))) + size = infer_size(X, kwargs.pop("size", None)) + v = pm.StudentT(name + "_rotated_", mu=0.0, sigma=1.0, nu=self.nu, size=size, **kwargs) + f = pm.Deterministic(name, mu + cholesky(cov).dot(v)) else: - f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, size=shape, **kwargs) + f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, **kwargs) return f def prior(self, name, X, reparameterize=True, **kwargs): @@ -436,7 +436,7 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): if is_observed: return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs) else: - # shape = infer_shape(X, kwargs.pop("shape", None)) + # size = infer_size(X, kwargs.pop("size", None)) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) def _get_given_vals(self, given): @@ -974,8 +974,8 @@ def conditional(self, name, Xnew, **kwargs): constructor. """ mu, cov = self._build_conditional(Xnew) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + size = infer_size(Xnew, kwargs.pop("size", None)) + return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs) @conditioned_vars(["Xs", "y", "sigma"]) @@ -1098,8 +1098,8 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): if is_observed: return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, observed=y, **kwargs) else: - shape = np.prod([len(X) for X in Xs]) - return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=shape, **kwargs) + size = np.prod([len(X) for X in Xs]) + return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs) def _build_conditional(self, Xnew, pred_noise, diag): Xs, y, sigma = self.Xs, self.y, self.sigma @@ -1175,8 +1175,8 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs): constructor. """ mu, cov = self._build_conditional(Xnew, pred_noise, False) - shape = infer_shape(Xnew, kwargs.pop("shape", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs) + size = infer_size(Xnew, kwargs.pop("size", None)) + return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs) def predict(self, Xnew, point=None, diag=False, pred_noise=False): R""" From 03c2a13d7b58b6edd7da705c20746de2d24782e9 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 27 Oct 2021 16:29:25 -0700 Subject: [PATCH 08/32] change naming shape->size where appropriate --- pymc/gp/util.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 29edfcf3901..a5e18af53f5 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -65,9 +65,9 @@ def replace_with_value(vars_needed, replacements: Dict): return fn(**replacements) -def infer_shape(X, n_points=None): +def infer_size(X, n_points=None): R""" - Maybe attempt to infer the shape of a Gaussian process input matrix. + Maybe attempt to infer the size, or N, of a Gaussian process input matrix. If a specific shape cannot be inferred, for instance if X is symbolic, then an error is raised. From 4bcb448985470b4ae9eb5c1e73d0f39f2840c321 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 27 Oct 2021 21:32:25 -0700 Subject: [PATCH 09/32] add deprecation warning for is_observed --- pymc/gp/gp.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 1553a5e0cf8..775d0288ffd 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -432,11 +432,14 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): self.X = X self.y = y self.noise = noise - # TODO: deprecate is_observed. untested, shouldnt be used. if is_observed: return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs) else: - # size = infer_size(X, kwargs.pop("size", None)) + warnings.warn( + "The 'is_observed' argument has been deprecated. If the GP is " + "unobserved use gp.Latent instead.", + DeprecationWarning, + ) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) def _get_given_vals(self, given): @@ -736,6 +739,11 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw **kwargs, ) else: + warnings.warn( + "The 'is_observed' argument has been deprecated. If the GP is " + "unobserved use gp.Latent instead.", + DeprecationWarning, + ) return pm.DensityDist( name, X, From 167cb4ea20703e1531447c6cfeccf7c61bbd3cf4 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 27 Oct 2021 22:00:02 -0700 Subject: [PATCH 10/32] add jitter arg for covs headed for cholesky decomps, previously fixed at 1e-6. add deprecation warning for is_observed arg --- pymc/gp/gp.py | 120 +++++++++++++++++++++++++++++++----------------- pymc/gp/util.py | 8 ++-- 2 files changed, 83 insertions(+), 45 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 775d0288ffd..8b75bd0f2ec 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -33,6 +33,7 @@ stabilize, ) from pymc.math import cartesian, kron_diag, kron_dot, kron_solve_lower, kron_solve_upper +JITTER_DEFAULT = 1e-6 __all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"] @@ -126,9 +127,9 @@ class Latent(Base): def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)): super().__init__(mean_func=mean_func, cov_func=cov_func) - def _build_prior(self, name, X, reparameterize=True, **kwargs): + def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): mu = self.mean_func(X) - cov = stabilize(self.cov_func(X)) + cov = stabilize(self.cov_func(X), jitter) if reparameterize: size = infer_size(X, kwargs.pop("size", None)) print("_build_prior:size", size) @@ -138,7 +139,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs): f = pm.MvNormal(name, mu=mu, cov=cov, **kwargs) return f - def prior(self, name, X, reparameterize=True, **kwargs): + def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the GP prior distribution evaluated over the input locations `X`. @@ -159,11 +160,14 @@ def prior(self, name, X, reparameterize=True, **kwargs): reparameterize: bool Reparameterize the distribution by rotating the random variable by the Cholesky factor of the covariance matrix. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to distribution constructor. """ - f = self._build_prior(name, X, reparameterize, **kwargs) + f = self._build_prior(name, X, reparameterize, jitter, **kwargs) self.X = X self.f = f return f @@ -183,10 +187,10 @@ def _get_given_vals(self, given): X, f = self.X, self.f return X, f, cov_total, mean_total - def _build_conditional(self, Xnew, X, f, cov_total, mean_total): + def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) - L = cholesky(stabilize(Kxx)) + L = cholesky(stabilize(Kxx, jitter)) A = solve_lower(L, Kxs) v = solve_lower(L, f - mean_total(X)) mu = self.mean_func(Xnew) + at.dot(at.transpose(A), v) @@ -194,7 +198,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total): cov = Kss - at.dot(at.transpose(A), A) return mu, cov - def conditional(self, name, Xnew, given=None, **kwargs): + def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -219,12 +223,15 @@ def conditional(self, name, Xnew, given=None, **kwargs): Can optionally take as key value pairs: `X`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC for more information. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) - mu, cov = self._build_conditional(Xnew, *givens) + mu, cov = self._build_conditional(Xnew, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @@ -267,9 +274,9 @@ def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0), nu=None): def __add__(self, other): raise TypeError("Student's T processes aren't additive") - def _build_prior(self, name, X, reparameterize=True, **kwargs): + def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): mu = self.mean_func(X) - cov = stabilize(self.cov_func(X)) + cov = stabilize(self.cov_func(X), jitter) if reparameterize: size = infer_size(X, kwargs.pop("size", None)) v = pm.StudentT(name + "_rotated_", mu=0.0, sigma=1.0, nu=self.nu, size=size, **kwargs) @@ -278,7 +285,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs): f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, **kwargs) return f - def prior(self, name, X, reparameterize=True, **kwargs): + def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the TP prior distribution evaluated over the input locations `X`. @@ -299,7 +306,7 @@ def prior(self, name, X, reparameterize=True, **kwargs): Extra keyword arguments that are passed to distribution constructor. """ - f = self._build_prior(name, X, reparameterize, **kwargs) + f = self._build_prior(name, X, reparameterize, jitter, **kwargs) self.X = X self.f = f return f @@ -318,7 +325,7 @@ def _build_conditional(self, Xnew, X, f): covT = (self.nu + beta - 2) / (nu2 - 2) * cov return nu2, mu, covT - def conditional(self, name, Xnew, **kwargs): + def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -333,6 +340,9 @@ def conditional(self, name, Xnew, **kwargs): Name of the random variable Xnew: array-like Function input values. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -340,7 +350,7 @@ def conditional(self, name, Xnew, **kwargs): X = self.X f = self.f - nu2, mu, cov = self._build_conditional(Xnew, X, f) + nu2, mu, cov = self._build_conditional(Xnew, X, f, jitter) return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, **kwargs) @@ -397,7 +407,7 @@ def _build_marginal_likelihood(self, X, noise): cov = Kxx + Knx return mu, cov - def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): + def marginal_likelihood(self, name, X, y, noise, jitter=JITTER_DEFAULT, is_observed=True, **kwargs): R""" Returns the marginal likelihood distribution, given the input locations `X` and the data `y`. @@ -421,6 +431,9 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): noise: scalar, Variable, or Covariance Standard deviation of the Gaussian noise. Can also be a Covariance for non-white noise. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -428,7 +441,7 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs): if not isinstance(noise, Covariance): noise = pm.gp.cov.WhiteNoise(noise) - mu, cov = self._build_marginal_likelihood(X, noise) + mu, cov = self._build_marginal_likelihood(X, noise, jitter) self.X = X self.y = y self.noise = noise @@ -460,12 +473,12 @@ def _get_given_vals(self, given): X, y, noise = self.X, self.y, self.noise return X, y, noise, cov_total, mean_total - def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total): + def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total, jitter): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) Knx = noise(X) rxx = y - mean_total(X) - L = cholesky(stabilize(Kxx) + Knx) + L = cholesky(stabilize(Kxx, jitter) + Knx) A = solve_lower(L, Kxs) v = solve_lower(L, rxx) mu = self.mean_func(Xnew) + at.dot(at.transpose(A), v) @@ -480,9 +493,9 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mea cov = Kss - at.dot(at.transpose(A), A) if pred_noise: cov += noise(Xnew) - return mu, cov if pred_noise else stabilize(cov) + return mu, cov if pred_noise else stabilize(cov, jitter) - def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): + def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -510,16 +523,19 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): Can optionally take as key value pairs: `X`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC for more information. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) - mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) + mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) - def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): + def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT): R""" Return the mean vector and covariance matrix of the conditional distribution as numpy arrays, given a `point`, such as the MAP @@ -540,14 +556,17 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None): Default is `False`. given: dict Same as `conditional` method. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. """ if given is None: given = {} - mu, cov = self._predict_at(Xnew, diag, pred_noise, given) + mu, cov = self._predict_at(Xnew, diag, pred_noise, given, jitter) return replace_with_values([mu, cov], replacements=point) - def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None): + def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT): R""" Return the mean vector and covariance matrix of the conditional distribution as symbolic variables. @@ -654,11 +673,11 @@ def __add__(self, other): # Use y as first argument, so that we can use functools.partial # in marginal_likelihood instead of lambda. This makes pickling # possible. - def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): + def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter): sigma2 = at.square(sigma) Kuu = self.cov_func(Xu) Kuf = self.cov_func(Xu, X) - Luu = cholesky(stabilize(Kuu)) + Luu = cholesky(stabilize(Kuu, jitter)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": @@ -683,7 +702,7 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma): quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace) - def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kwargs): + def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the approximate marginal likelihood distribution, given the input locations `X`, inducing point locations `Xu`, data `y`, and white noise @@ -706,6 +725,9 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw is_observed: bool Whether to set `y` as an `observed` variable in the `model`. Default is `True`. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -732,6 +754,7 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw X, Xu, self.sigma, + jitter, logp=self._build_marginal_likelihood_logp, observed=y, ndims_params=[2, 2, 0], @@ -749,6 +772,7 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw X, Xu, self.sigma, + jitter, logp=self._build_marginal_likelihood_logp, observed=y, ndims_params=[2, 2, 0], @@ -757,11 +781,11 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw **kwargs, ) - def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total): + def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total, jitter): sigma2 = at.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) - Luu = cholesky(stabilize(Kuu)) + Luu = cholesky(stabilize(Kuu, jitter)) A = solve_lower(Luu, Kuf) Qffd = at.sum(A * A, 0) if self.approx == "FITC": @@ -788,7 +812,7 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, cov = self.cov_func(Xnew) - at.dot(at.transpose(As), As) + at.dot(at.transpose(C), C) if pred_noise: cov += sigma2 * at.identity_like(cov) - return mu, cov if pred_noise else stabilize(cov) + return mu, cov if pred_noise else stabilize(cov, jitter) def _get_given_vals(self, given): if given is None: @@ -805,7 +829,7 @@ def _get_given_vals(self, given): X, Xu, y, sigma = self.X, self.Xu, self.y, self.sigma return X, Xu, y, sigma, cov_total, mean_total - def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): + def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs): R""" Returns the approximate conditional distribution of the GP evaluated over new input locations `Xnew`. @@ -824,13 +848,16 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs): Can optionally take as key value pairs: `X`, `Xu`, `y`, `noise`, and `gp`. See the section in the documentation on additive GP models in PyMC for more information. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ givens = self._get_given_vals(given) - mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens) + mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @@ -899,16 +926,16 @@ def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))): def __add__(self, other): raise TypeError("Additive, Kronecker-structured processes not implemented") - def _build_prior(self, name, Xs, **kwargs): + def _build_prior(self, name, Xs, jitter, **kwargs): self.N = np.prod([len(X) for X in Xs]) mu = self.mean_func(cartesian(*Xs)) - chols = [cholesky(stabilize(cov(X))) for cov, X in zip(self.cov_funcs, Xs)] + chols = [cholesky(stabilize(cov(X, jitter))) for cov, X in zip(self.cov_funcs, Xs)] # remove reparameterization option v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=self.N, **kwargs) f = pm.Deterministic(name, mu + at.flatten(kron_dot(chols, v))) return f - def prior(self, name, Xs, **kwargs): + def prior(self, name, Xs, jitter, **kwargs): """ Returns the prior distribution evaluated over the input locations `Xs`. @@ -922,22 +949,25 @@ def prior(self, name, Xs, **kwargs): must be passable to its respective covariance without error. The total covariance function is measured on the full grid `cartesian(*Xs)`. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to the `KroneckerNormal` distribution constructor. """ if len(Xs) != len(self.cov_funcs): raise ValueError("Must provide a covariance function for each X") - f = self._build_prior(name, Xs, **kwargs) + f = self._build_prior(name, Xs, jitter, **kwargs) self.Xs = Xs self.f = f return f - def _build_conditional(self, Xnew): + def _build_conditional(self, Xnew, jitter): Xs, f = self.Xs, self.f X = cartesian(*Xs) delta = f - self.mean_func(X) - covs = [stabilize(cov(Xi)) for cov, Xi in zip(self.cov_funcs, Xs)] + covs = [stabilize(cov(Xi), jitter) for cov, Xi in zip(self.cov_funcs, Xs)] chols = [cholesky(cov) for cov in covs] cholTs = [at.transpose(chol) for chol in chols] Kss = self.cov_func(Xnew) @@ -947,10 +977,10 @@ def _build_conditional(self, Xnew): alpha = kron_solve_upper(cholTs, alpha) mu = at.dot(Ksx, alpha).ravel() + self.mean_func(Xnew) A = kron_solve_lower(chols, Kxs) - cov = stabilize(Kss - at.dot(at.transpose(A), A)) + cov = stabilize(Kss - at.dot(at.transpose(A), A), jitter) return mu, cov - def conditional(self, name, Xnew, **kwargs): + def conditional(self, name, Xnew, jitter, **kwargs): """ Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -977,11 +1007,14 @@ def conditional(self, name, Xnew, **kwargs): Xnew: array-like Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. + jitter: scalar + A small correction added to the diagonal of positive semi-definite + covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ - mu, cov = self._build_conditional(Xnew) + mu, cov = self._build_conditional(Xnew, jitter) size = infer_size(Xnew, kwargs.pop("size", None)) return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs) @@ -1106,6 +1139,11 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): if is_observed: return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, observed=y, **kwargs) else: + warnings.warn( + "The 'is_observed' argument has been deprecated. If the GP is " + "unobserved use gp.LatentKron instead.", + DeprecationWarning, + ) size = np.prod([len(X) for X in Xs]) return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs) diff --git a/pymc/gp/util.py b/pymc/gp/util.py index a5e18af53f5..462f84355c8 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -88,22 +88,22 @@ def infer_size(X, n_points=None): return n_points -def stabilize(K, c=1e-6): +def stabilize(K, jitter=1e-6): R""" Adds small diagonal to a covariance matrix. Often the matrices calculated from covariance functions, `K = cov_func(X)` do not appear numerically to be positive semi-definite. Adding a small - correction, `c`, to the diagonal is usually enough to fix this. + correction, `jitter`, to the diagonal is usually enough to fix this. Parameters ---------- K: array-like A square covariance or kernel matrix. - c: float + jitter: float A small constant. """ - return K + c * at.identity_like(K) + return K + jitter * at.identity_like(K) def kmeans_inducing_points(n_inducing, X): From e18b8c47d5395a6a4c32a9f0061198bc8680c161 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 27 Oct 2021 22:11:34 -0700 Subject: [PATCH 11/32] clean up trivial aesara.function usage to .eval() instead --- pymc/tests/test_gp.py | 224 +++++++++++++++++++++--------------------- 1 file changed, 112 insertions(+), 112 deletions(-) diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index d57adbabccb..297ab35df0c 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -34,7 +34,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: zero_mean = pm.gp.mean.Zero() - M = aesara.function([], zero_mean(X))() + M = zero_mean(X).eval() assert np.all(M == 0) assert M.shape == (10,) @@ -44,7 +44,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: const_mean = pm.gp.mean.Constant(6) - M = aesara.function([], const_mean(X))() + M = const_mean(X).eval() assert np.all(M == 6) assert M.shape == (10,) @@ -54,7 +54,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: linear_mean = pm.gp.mean.Linear(2, 0.5) - M = aesara.function([], linear_mean(X))() + M = linear_mean.eval() npt.assert_allclose(M[1], 0.7222, atol=1e-3) assert M.shape == (10,) @@ -66,7 +66,7 @@ def test_add(self): mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5) mean2 = pm.gp.mean.Constant(2) mean = mean1 + mean2 + mean2 - M = aesara.function([], mean(X))() + M = mean(X).eval() npt.assert_allclose(M[1], 0.7222 + 2 + 2, atol=1e-3) def test_prod(self): @@ -75,7 +75,7 @@ def test_prod(self): mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5) mean2 = pm.gp.mean.Constant(2) mean = mean1 * mean2 * mean2 - M = aesara.function([], mean(X))() + M = mean(X).eval() npt.assert_allclose(M[1], 0.7222 * 2 * 2, atol=1e-3) def test_add_multid(self): @@ -86,7 +86,7 @@ def test_add_multid(self): mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b) mean2 = pm.gp.mean.Constant(2) mean = mean1 + mean2 + mean2 - M = aesara.function([], mean(X))() + M = mean(X).eval() npt.assert_allclose(M[1], 10.8965 + 2 + 2, atol=1e-3) def test_prod_multid(self): @@ -97,7 +97,7 @@ def test_prod_multid(self): mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b) mean2 = pm.gp.mean.Constant(2) mean = mean1 * mean2 * mean2 - M = aesara.function([], mean(X))() + M = mean(X).eval() npt.assert_allclose(M[1], 10.8965 * 2 * 2, atol=1e-3) @@ -108,10 +108,10 @@ def test_symadd_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 + cov2 - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightadd_scalar(self): @@ -119,10 +119,10 @@ def test_rightadd_scalar(self): with pm.Model() as model: a = 1 cov = pm.gp.cov.ExpQuad(1, 0.1) + a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftadd_scalar(self): @@ -130,10 +130,10 @@ def test_leftadd_scalar(self): with pm.Model() as model: a = 1 cov = a + pm.gp.cov.ExpQuad(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightadd_matrix(self): @@ -141,10 +141,10 @@ def test_rightadd_matrix(self): M = 2 * np.ones((10, 10)) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) + M - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftadd_matrixt(self): @@ -152,10 +152,10 @@ def test_leftadd_matrixt(self): M = 2 * at.ones((10, 10)) with pm.Model() as model: cov = M + pm.gp.cov.ExpQuad(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_matrix(self): @@ -164,8 +164,8 @@ def test_leftprod_matrix(self): with pm.Model() as model: cov = M + pm.gp.cov.ExpQuad(1, 0.1) cov_true = pm.gp.cov.ExpQuad(1, 0.1) + M - K = aesara.function([], cov(X))() - K_true = aesara.function([], cov_true(X))() + K = cov(X).eval() + K_true = cov_true(X).eval() assert np.allclose(K, K_true) def test_inv_rightadd(self): @@ -181,10 +181,10 @@ def test_symprod_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 * cov2 - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightprod_scalar(self): @@ -192,10 +192,10 @@ def test_rightprod_scalar(self): with pm.Model() as model: a = 2 cov = pm.gp.cov.ExpQuad(1, 0.1) * a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_scalar(self): @@ -203,10 +203,10 @@ def test_leftprod_scalar(self): with pm.Model() as model: a = 2 cov = a * pm.gp.cov.ExpQuad(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_rightprod_matrix(self): @@ -214,10 +214,10 @@ def test_rightprod_matrix(self): M = 2 * np.ones((10, 10)) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) * M - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_leftprod_matrix(self): @@ -226,8 +226,8 @@ def test_leftprod_matrix(self): with pm.Model() as model: cov = M * pm.gp.cov.ExpQuad(1, 0.1) cov_true = pm.gp.cov.ExpQuad(1, 0.1) * M - K = aesara.function([], cov(X))() - K_true = aesara.function([], cov_true(X))() + K = cov(X).eval() + K_true = cov_true(X).eval() assert np.allclose(K, K_true) def test_multiops(self): @@ -244,12 +244,12 @@ def test_multiops(self): + pm.gp.cov.ExpQuad(1, 0.1) + 3 ) - K1 = aesara.function([], cov1(X))() - K2 = aesara.function([], cov2(X))() + K1 = cov1(X).eval() + K2 = cov2(X).eval() assert np.allclose(K1, K2) # check diagonal - K1d = aesara.function([], cov1(X, diag=True))() - K2d = aesara.function([], cov2(X, diag=True))() + K1d = cov1(X, diag=True).eval() + K2d = cov2(X, diag=True).eval() npt.assert_allclose(np.diag(K1), K2d, atol=1e-5) npt.assert_allclose(np.diag(K2), K1d, atol=1e-5) @@ -265,10 +265,10 @@ def test_symexp_cov(self): with pm.Model() as model: cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov = cov1 ** 2 - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_covexp_numpy(self): @@ -276,10 +276,10 @@ def test_covexp_numpy(self): with pm.Model() as model: a = np.array([[2]]) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_covexp_aesara(self): @@ -287,10 +287,10 @@ def test_covexp_aesara(self): with pm.Model() as model: a = at.alloc(2.0, 1, 1) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_covexp_shared(self): @@ -298,10 +298,10 @@ def test_covexp_shared(self): with pm.Model() as model: a = aesara.shared(2.0) cov = pm.gp.cov.ExpQuad(1, 0.1) ** a - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_invalid_covexp(self): @@ -321,11 +321,11 @@ def test_symprod_cov(self): cov1 = pm.gp.cov.ExpQuad(1, 0.1) cov2 = pm.gp.cov.ExpQuad(1, 0.1) cov = pm.gp.cov.Kron([cov1, cov2]) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 1 * 0.53940, atol=1e-3) npt.assert_allclose(K[0, 11], 0.53940 * 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_multiops(self): @@ -342,8 +342,8 @@ def test_multiops(self): ) cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1) cov = pm.gp.cov.Kron([cov1, cov2]) - K_true = kronecker(aesara.function([], cov1(X1))(), aesara.function([], cov2(X2))()).eval() - K = aesara.function([], cov(X))() + K_true = kronecker(cov1(X1).eval(), cov2(X2).eval()).eval() + K = cov(X).eval() npt.assert_allclose(K_true, K) @@ -352,30 +352,30 @@ def test_slice1(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, 0.1, active_dims=[0, 0, 1]) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.20084298, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_slice2(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1, 2]) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_slice3(self): X = np.linspace(0, 1, 30).reshape(10, 3) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1, 2]) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_diffslice(self): @@ -384,10 +384,10 @@ def test_diffslice(self): cov = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 0, 0]) + pm.gp.cov.ExpQuad( 3, ls=[0.1, 0.2, 0.3] ) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.683572, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -402,7 +402,7 @@ def test_stable(self): X = np.random.uniform(low=320.0, high=400.0, size=[2000, 2]) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, 0.1) - dists = aesara.function([], cov.square_dist(X, X))() + dists = cov.square_dist(X, X).eval() assert not np.any(dists < 0) @@ -411,44 +411,44 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_2d(self): X = np.linspace(0, 1, 10).reshape(5, 2) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, 0.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.820754, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_2dard(self): X = np.linspace(0, 1, 10).reshape(5, 2) with pm.Model() as model: cov = pm.gp.cov.ExpQuad(2, np.array([1, 2])) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.969607, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_inv_lengthscale(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.ExpQuad(1, ls_inv=10) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -457,14 +457,14 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.WhiteNoise(sigma=0.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.0, atol=1e-3) npt.assert_allclose(K[0, 0], 0.5 ** 2, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) # check predict - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.0, atol=1e-3) # white noise predicting should return all zeros npt.assert_allclose(K[0, 0], 0.0, atol=1e-3) @@ -475,14 +475,14 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Constant(2.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 2.5, atol=1e-3) npt.assert_allclose(K[0, 0], 2.5, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 2.5, atol=1e-3) npt.assert_allclose(K[0, 0], 2.5, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -491,12 +491,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.RatQuad(1, ls=0.1, alpha=0.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -505,12 +505,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Exponential(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -519,12 +519,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern52(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -533,12 +533,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern32(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -547,11 +547,11 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Matern12(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3) - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -560,12 +560,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Cosine(1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.766, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.766, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -574,12 +574,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Periodic(1, 0.1, 0.1) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -588,12 +588,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Linear(1, 0.5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -602,12 +602,12 @@ def test_1d(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: cov = pm.gp.cov.Polynomial(1, 0.5, 2, 0) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) @@ -621,12 +621,12 @@ def warp_func(x, a, b, c): with pm.Model() as model: cov_m52 = pm.gp.cov.Matern52(1, 0.2) cov = pm.gp.cov.WarpedInput(1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -646,12 +646,12 @@ def tanh_func(x, x1, x2, w, x0): with pm.Model() as model: cov = pm.gp.cov.Gibbs(1, tanh_func, args=(0.05, 0.6, 0.4, 1.0)) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -673,12 +673,12 @@ def scaling_func(x, a, b): with pm.Model() as model: cov_m52 = pm.gp.cov.Matern52(1, 0.2) cov = pm.gp.cov.ScaledCov(1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_raises(self): @@ -1210,12 +1210,12 @@ def test_1d_tau1(self): etalon = 0.600881 with pm.Model(): cov = pm.gp.cov.Circular(1, 1, tau=5) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], etalon, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) def test_1d_tau2(self): @@ -1223,10 +1223,10 @@ def test_1d_tau2(self): etalon = 0.691239 with pm.Model(): cov = pm.gp.cov.Circular(1, 1, tau=4) - K = aesara.function([], cov(X))() + K = cov(X).eval() npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - K = aesara.function([], cov(X, X))() + K = cov(X, X).eval() npt.assert_allclose(K[0, 1], etalon, atol=1e-3) # check diagonal - Kd = aesara.function([], cov(X, diag=True))() + Kd = cov(X, diag=True).eval() npt.assert_allclose(np.diag(K), Kd, atol=1e-5) From 212fb1ebcdfce85f06476c999af3d0ef577aab16 Mon Sep 17 00:00:00 2001 From: bwengals Date: Thu, 28 Oct 2021 00:19:36 -0700 Subject: [PATCH 12/32] fix gp.util.replace_with_values to handle case with no symbolic values, .eval() works --- pymc/gp/util.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 462f84355c8..39fe62e9bea 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -14,7 +14,7 @@ import warnings -from typing import Dict +from typing import Dict, Optional import aesara.tensor as at import numpy as np @@ -36,11 +36,14 @@ from pymc.aesaraf import compile_rv_inplace, walk_model -def replace_with_value(vars_needed, replacements: Dict): +# TODO: add test +def replace_with_values(model, vars_needed, replacements=None): R""" - Replace random variable nodes in the graph with values given in replacements. - + Replace random variable nodes in the graph with values given by the replacements dict. + Uses untransformed versions of the inputs, performs some basic input validation. + NOTE TO REVIEWER: Modified this from `sample_posterior_predictive`. Is there a better way to do this? + """ inputs, input_names = [], [] for rv in walk_model(vars_needed, walk_past_rvs=True): @@ -48,6 +51,10 @@ def replace_with_value(vars_needed, replacements: Dict): inputs.append(rv) input_names.append(rv.name) + # then deterministic, no inputs are required, can eval and return + if len(inputs) == 0: + return tuple([v.eval() for v in vars_needed]) + fn = compile_rv_inplace( inputs, vars_needed, @@ -57,9 +64,9 @@ def replace_with_value(vars_needed, replacements: Dict): ) replacements = {name: val for name, val in replacements.items() if name in input_names} - missing = set(needed) - set(replacements.keys()) + missing = set(vars_needed) - set(replacements.keys()) if len(missing) > 0: - missing_str = ", ".join(missing) + missing_str = ", ".join({m.name for m in missing}) raise ValueError(f"Values for {missing_str} must be included in `replacements`.") return fn(**replacements) From ad4a1cabeefc88f963054669aef6489357e26e56 Mon Sep 17 00:00:00 2001 From: bwengals Date: Thu, 28 Oct 2021 00:20:52 -0700 Subject: [PATCH 13/32] jitter=0 for conditonals/predicts, fix replace_with_values calls --- pymc/gp/gp.py | 71 +++++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 8b75bd0f2ec..9e2a8d318a9 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -27,12 +27,15 @@ cholesky, conditioned_vars, infer_size, - replace_with_value, + replace_with_values, solve_lower, solve_upper, stabilize, ) from pymc.math import cartesian, kron_diag, kron_dot, kron_solve_lower, kron_solve_upper +from pymc.distributions.distribution import NoDistribution +from pymc.model import modelcontext + JITTER_DEFAULT = 1e-6 __all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"] @@ -53,7 +56,7 @@ def __add__(self, other): raise TypeError("Cannot add different GP types") mean_total = self.mean_func + other.mean_func cov_total = self.cov_func + other.cov_func - return self.__class__(mean_total, cov_total) + return self.__class__(mean_func=mean_total, cov_func=cov_total) def prior(self, name, X, *args, **kwargs): raise NotImplementedError @@ -64,7 +67,7 @@ def marginal_likelihood(self, name, X, *args, **kwargs): def conditional(self, name, Xnew, *args, **kwargs): raise NotImplementedError - def predict(self, Xnew, point=None, given=None, diag=False): + def predict(self, Xnew, point=None, given=None, diag=False, model=None): raise NotImplementedError @@ -132,7 +135,6 @@ def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kw cov = stabilize(self.cov_func(X), jitter) if reparameterize: size = infer_size(X, kwargs.pop("size", None)) - print("_build_prior:size", size) v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=size, **kwargs) f = pm.Deterministic(name, mu + cholesky(cov).dot(v)) else: @@ -198,7 +200,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter): cov = Kss - at.dot(at.transpose(A), A) return mu, cov - def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs): + def conditional(self, name, Xnew, given=None, jitter=0.0, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -225,7 +227,8 @@ def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs): models in PyMC for more information. jitter: scalar A small correction added to the diagonal of positive semi-definite - covariance matrices to ensure numerical stability. Default value is 1e-6. + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -311,11 +314,11 @@ def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): self.f = f return f - def _build_conditional(self, Xnew, X, f): + def _build_conditional(self, Xnew, X, f, jitter): Kxx = self.cov_func(X) Kxs = self.cov_func(X, Xnew) Kss = self.cov_func(Xnew) - L = cholesky(stabilize(Kxx)) + L = cholesky(stabilize(Kxx, jitter)) A = solve_lower(L, Kxs) cov = Kss - at.dot(at.transpose(A), A) v = solve_lower(L, f - self.mean_func(X)) @@ -325,7 +328,7 @@ def _build_conditional(self, Xnew, X, f): covT = (self.nu + beta - 2) / (nu2 - 2) * cov return nu2, mu, covT - def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs): + def conditional(self, name, Xnew, jitter=0.0, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -342,7 +345,8 @@ def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs): Function input values. jitter: scalar A small correction added to the diagonal of positive semi-definite - covariance matrices to ensure numerical stability. Default value is 1e-6. + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -400,14 +404,14 @@ class Marginal(Base): fcond = gp.conditional("fcond", Xnew=Xnew) """ - def _build_marginal_likelihood(self, X, noise): + def _build_marginal_likelihood(self, X, noise, jitter): mu = self.mean_func(X) Kxx = self.cov_func(X) Knx = noise(X) cov = Kxx + Knx - return mu, cov + return mu, stabilize(cov, jitter) - def marginal_likelihood(self, name, X, y, noise, jitter=JITTER_DEFAULT, is_observed=True, **kwargs): + def marginal_likelihood(self, name, X, y, noise, jitter=0.0, is_observed=True, **kwargs): R""" Returns the marginal likelihood distribution, given the input locations `X` and the data `y`. @@ -433,7 +437,7 @@ def marginal_likelihood(self, name, X, y, noise, jitter=JITTER_DEFAULT, is_obser non-white noise. jitter: scalar A small correction added to the diagonal of positive semi-definite - covariance matrices to ensure numerical stability. Default value is 1e-6. + covariance matrices to ensure numerical stability. Default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -495,7 +499,7 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mea cov += noise(Xnew) return mu, cov if pred_noise else stabilize(cov, jitter) - def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs): + def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kwargs): R""" Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -525,7 +529,8 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DE models in PyMC for more information. jitter: scalar A small correction added to the diagonal of positive semi-definite - covariance matrices to ensure numerical stability. Default value is 1e-6. + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -535,7 +540,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DE mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) - def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT): + def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=0.0, model=None): R""" Return the mean vector and covariance matrix of the conditional distribution as numpy arrays, given a `point`, such as the MAP @@ -558,15 +563,16 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, ji Same as `conditional` method. jitter: scalar A small correction added to the diagonal of positive semi-definite - covariance matrices to ensure numerical stability. Default value is 1e-6. + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. """ if given is None: given = {} - + model = modelcontext(model) mu, cov = self._predict_at(Xnew, diag, pred_noise, given, jitter) - return replace_with_values([mu, cov], replacements=point) + return replace_with_values(model, [mu, cov], replacements=point) - def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT): + def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=0.0): R""" Return the mean vector and covariance matrix of the conditional distribution as symbolic variables. @@ -586,7 +592,7 @@ def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=JIT Same as `conditional` method. """ givens = self._get_given_vals(given) - mu, cov = self._build_conditional(Xnew, pred_noise, diag, *givens) + mu, cov = self._build_conditional(Xnew, pred_noise, diag, *givens, jitter) return mu, cov @@ -702,7 +708,7 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter): quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace) - def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitter=JITTER_DEFAULT, **kwargs): + def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitter=0.0, **kwargs): R""" Returns the approximate marginal likelihood distribution, given the input locations `X`, inducing point locations `Xu`, data `y`, and white noise @@ -727,7 +733,7 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitt Default is `True`. jitter: scalar A small correction added to the diagonal of positive semi-definite - covariance matrices to ensure numerical stability. Default value is 1e-6. + covariance matrices to ensure numerical stability. Default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -829,7 +835,7 @@ def _get_given_vals(self, given): X, Xu, y, sigma = self.X, self.Xu, self.y, self.sigma return X, Xu, y, sigma, cov_total, mean_total - def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs): + def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kwargs): R""" Returns the approximate conditional distribution of the GP evaluated over new input locations `Xnew`. @@ -850,7 +856,8 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DE models in PyMC for more information. jitter: scalar A small correction added to the diagonal of positive semi-definite - covariance matrices to ensure numerical stability. Default value is 1e-6. + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -859,7 +866,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DE givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) - + @conditioned_vars(["Xs", "f"]) class LatentKron(Base): @@ -980,7 +987,7 @@ def _build_conditional(self, Xnew, jitter): cov = stabilize(Kss - at.dot(at.transpose(A), A), jitter) return mu, cov - def conditional(self, name, Xnew, jitter, **kwargs): + def conditional(self, name, Xnew, jitter=0.0, **kwargs): """ Returns the conditional distribution evaluated over new input locations `Xnew`. @@ -1009,7 +1016,8 @@ def conditional(self, name, Xnew, jitter, **kwargs): vector with shape `(n, 1)`. jitter: scalar A small correction added to the diagonal of positive semi-definite - covariance matrices to ensure numerical stability. Default value is 1e-6. + covariance matrices to ensure numerical stability. For conditionals + the default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution constructor. @@ -1224,7 +1232,7 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs): size = infer_size(Xnew, kwargs.pop("size", None)) return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs) - def predict(self, Xnew, point=None, diag=False, pred_noise=False): + def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None): R""" Return the mean vector and covariance matrix of the conditional distribution as numpy arrays, given a `point`, such as the MAP @@ -1244,8 +1252,9 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False): Whether or not observation noise is included in the conditional. Default is `False`. """ + model = modelcontext(model) mu, cov = self._predict_at(Xnew, pred_noise, diag) - return replace_with_values([mu, cov], replacements=point) + return replace_with_values(model, [mu, cov], replacements=point) def _predict_at(self, Xnew, diag=False, pred_noise=False): R""" From a4a4f99b530f71eac7cddaab54c8dd443c5179a8 Mon Sep 17 00:00:00 2001 From: bwengals Date: Thu, 28 Oct 2021 00:25:31 -0700 Subject: [PATCH 14/32] fix more tests - use model.logp instead of variable.logp - set req kwargs cov_func and mean_func - fix weirdly small scale on some input X, y - move predict calls into model block - the two kron models outstanding --- pymc/tests/test_gp.py | 81 ++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 44 deletions(-) diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index 297ab35df0c..abaac22b604 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -54,7 +54,7 @@ def test_value(self): X = np.linspace(0, 1, 10)[:, None] with pm.Model() as model: linear_mean = pm.gp.mean.Linear(2, 0.5) - M = linear_mean.eval() + M = linear_mean(X).eval() npt.assert_allclose(M[1], 0.7222, atol=1e-3) assert M.shape == (10,) @@ -767,17 +767,16 @@ def test_raises3(self): B = pm.gp.cov.Coregion(1) -# @pytest.mark.xfail(reason="MvNormal was not yet refactored") class TestMarginalVsLatent: R""" Compare the logp of models Marginal, noise=0 and Latent. """ def setup_method(self): - X = np.random.randn(50, 3) - y = np.random.randn(50) * 0.01 - Xnew = np.random.randn(60, 3) - pnew = np.random.randn(60) * 0.01 + X = np.random.randn(20, 3) + y = np.random.randn(20) + Xnew = np.random.randn(30, 3) + pnew = np.random.randn(30) with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) @@ -813,7 +812,6 @@ def testLatent2(self): npt.assert_allclose(latent_logp, self.logp, atol=5) -@pytest.mark.xfail(reason="The `gp.predict` method was not yet refactored") class TestMarginalVsMarginalSparse: R""" Compare logp of models Marginal and MarginalSparse. @@ -822,9 +820,9 @@ class TestMarginalVsMarginalSparse: def setup_method(self): X = np.random.randn(50, 3) - y = np.random.randn(50) * 0.01 + y = np.random.randn(50) Xnew = np.random.randn(60, 3) - pnew = np.random.randn(60) * 0.01 + pnew = np.random.randn(60) with pm.Model() as model: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) @@ -858,8 +856,8 @@ def testPredictVar(self, approx): mean_func = pm.gp.mean.Constant(0.5) gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx=approx) f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) - mu1, var1 = self.gp.predict(self.Xnew, diag=True) - mu2, var2 = gp.predict(self.Xnew, diag=True) + mu1, var1 = self.gp.predict(self.Xnew, diag=True) + mu2, var2 = gp.predict(self.Xnew, diag=True) npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3) npt.assert_allclose(var1, var2, atol=0, rtol=1e-3) @@ -869,8 +867,8 @@ def testPredictCov(self): mean_func = pm.gp.mean.Constant(0.5) gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx="DTC") f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False) - mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True) - mu2, cov2 = gp.predict(self.Xnew, pred_noise=True) + mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True) + mu2, cov2 = gp.predict(self.Xnew, pred_noise=True) npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3) npt.assert_allclose(cov1, cov2, atol=0, rtol=1e-3) @@ -878,7 +876,7 @@ def testPredictCov(self): class TestGPAdditive: def setup_method(self): self.X = np.random.randn(50, 3) - self.y = np.random.randn(50) * 0.01 + self.y = np.random.randn(50) self.Xnew = np.random.randn(60, 3) self.noise = pm.gp.cov.WhiteNoise(0.1) self.covs = ( @@ -888,19 +886,18 @@ def setup_method(self): ) self.means = (pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5)) - # @pytest.mark.xfail(reason="MvNormal was not yet refactored") def testAdditiveMarginal(self): with pm.Model() as model1: - gp1 = pm.gp.Marginal(self.means[0], self.covs[0]) - gp2 = pm.gp.Marginal(self.means[1], self.covs[1]) - gp3 = pm.gp.Marginal(self.means[2], self.covs[2]) + gp1 = pm.gp.Marginal(mean_func=self.means[0], cov_func=self.covs[0]) + gp2 = pm.gp.Marginal(mean_func=self.means[1], cov_func=self.covs[1]) + gp3 = pm.gp.Marginal(mean_func=self.means[2], cov_func=self.covs[2]) gpsum = gp1 + gp2 + gp3 fsum = gpsum.marginal_likelihood("f", self.X, self.y, noise=self.noise) model1_logp = model1.logp({"fsum": self.y}) with pm.Model() as model2: - gptot = pm.gp.Marginal(reduce(add, self.means), reduce(add, self.covs)) + gptot = pm.gp.Marginal(mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs)) fsum = gptot.marginal_likelihood("f", self.X, self.y, noise=self.noise) model2_logp = model2.logp({"fsum": self.y}) npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) @@ -913,17 +910,18 @@ def testAdditiveMarginal(self): fp2 = gptot.conditional("fp2", self.Xnew) fp = np.random.randn(self.Xnew.shape[0]) - npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2) + logp1 = model1.logp({"fp1": fp}) + logp2 = model2.logp({"fp2": fp}) + npt.assert_allclose(logp1, logp2, atol=0, rtol=1e-2) - @pytest.mark.xfail(reason="MarginalSparse must be refactored to not use DensityDist") @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"]) def testAdditiveMarginalSparse(self, approx): Xu = np.random.randn(10, 3) sigma = 0.1 with pm.Model() as model1: - gp1 = pm.gp.MarginalSparse(self.means[0], self.covs[0], approx=approx) - gp2 = pm.gp.MarginalSparse(self.means[1], self.covs[1], approx=approx) - gp3 = pm.gp.MarginalSparse(self.means[2], self.covs[2], approx=approx) + gp1 = pm.gp.MarginalSparse(mean_func=self.means[0], cov_func=self.covs[0], approx=approx) + gp2 = pm.gp.MarginalSparse(mean_func=self.means[1], cov_func=self.covs[1], approx=approx) + gp3 = pm.gp.MarginalSparse(mean_func=self.means[2], cov_func=self.covs[2], approx=approx) gpsum = gp1 + gp2 + gp3 fsum = gpsum.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma) @@ -946,24 +944,23 @@ def testAdditiveMarginalSparse(self, approx): with model2: fp2 = gptot.conditional("fp2", self.Xnew) - fp = np.random.randn(self.Xnew.shape[0], 1) + fp = np.random.randn(self.Xnew.shape[0]) npt.assert_allclose( pm.logp(fp1, fp1).eval({fp1: fp}), pm.logp(fp2, fp2).eval({fp2: fp}), atol=0, rtol=1e-2 ) - @pytest.mark.xfail(reason="MvNormal was not yet refactored") def testAdditiveLatent(self): with pm.Model() as model1: - gp1 = pm.gp.Latent(self.means[0], self.covs[0]) - gp2 = pm.gp.Latent(self.means[1], self.covs[1]) - gp3 = pm.gp.Latent(self.means[2], self.covs[2]) + gp1 = pm.gp.Latent(mean_func=self.means[0], cov_func=self.covs[0]) + gp2 = pm.gp.Latent(mean_func=self.means[1], cov_func=self.covs[1]) + gp3 = pm.gp.Latent(mean_func=self.means[2], cov_func=self.covs[2]) gpsum = gp1 + gp2 + gp3 fsum = gpsum.prior("fsum", self.X, reparameterize=False) model1_logp = model1.logp({"fsum": self.y}) with pm.Model() as model2: - gptot = pm.gp.Latent(reduce(add, self.means), reduce(add, self.covs)) + gptot = pm.gp.Latent(mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs)) fsum = gptot.prior("fsum", self.X, reparameterize=False) model2_logp = model2.logp({"fsum": self.y}) npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) @@ -974,13 +971,10 @@ def testAdditiveLatent(self): fp2 = gptot.conditional("fp2", self.Xnew) fp = np.random.randn(self.Xnew.shape[0]) - npt.assert_allclose( - fp1.logp({"fsum": self.y, "fp1": fp}), - fp2.logp({"fsum": self.y, "fp2": fp}), - atol=0, - rtol=1e-2, - ) - + logp1 = model1.logp({"fsum": self.y, "fp1": fp}) + logp2 = model2.logp({"fsum": self.y, "fp2": fp}) + npt.assert_allclose(logp1, logp2, atol=0, rtol=1e-2) + def testAdditiveSparseRaises(self): # cant add different approximations with pm.Model() as model: @@ -1007,7 +1001,6 @@ def testAdditiveTypeRaises2(self): gp1 + gp2 -# @pytest.mark.xfail(reason="MvNormal was not yet refactored") class TestTP: R""" Compare TP with high degress of freedom to GP @@ -1018,17 +1011,18 @@ def setup_method(self): y = np.random.randn(20) Xnew = np.random.randn(30, 3) pnew = np.random.randn(30) - with pm.Model() as model: + + with pm.Model() as model1: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) gp = pm.gp.Latent(cov_func=cov_func) f = gp.prior("f", X, reparameterize=False) p = gp.conditional("p", Xnew) + self.gp_latent_logp = model1.logp({"f": y, "p": pnew}) self.X = X self.y = y self.Xnew = Xnew self.pnew = pnew - self.nu = 1000 - self.gp_latent_logp = model.logp({"f": y, "p": pnew}) + self.nu = 10000 def testTPvsLatent(self): with pm.Model() as model: @@ -1047,7 +1041,6 @@ def testTPvsLatentReparameterized(self): p = tp.conditional("p", self.Xnew) chol = np.linalg.cholesky(cov_func(self.X).eval()) f_rotated = np.linalg.solve(chol, self.y) - tp_logp = model.logp({"f_rotated_": f_rotated, "p": self.pnew}) npt.assert_allclose(self.gp_latent_logp, tp_logp, atol=0, rtol=1e-2) @@ -1077,7 +1070,7 @@ def setup_method(self): self.y = np.random.randn(self.N) * 0.1 self.Xnews = (np.random.randn(5, 1), np.random.randn(5, 1), np.random.randn(5, 1)) self.Xnew = np.concatenate(self.Xnews, axis=1) - self.pnew = np.random.randn(len(self.Xnew)) * 0.01 + self.pnew = np.random.randn(len(self.Xnew)) ls = 0.2 with pm.Model() as latent_model: self.cov_funcs = ( @@ -1134,7 +1127,7 @@ def setup_method(self): self.Xnews = (np.random.randn(5, 1), np.random.randn(5, 1), np.random.randn(5, 1)) self.Xnew = np.concatenate(self.Xnews, axis=1) self.sigma = 0.2 - self.pnew = np.random.randn(len(self.Xnew)) * 0.01 + self.pnew = np.random.randn(len(self.Xnew)) ls = 0.2 with pm.Model() as model: self.cov_funcs = [ From 9b26ca18eeb6bc932f1e001fe4a4929371c685ec Mon Sep 17 00:00:00 2001 From: bwengals Date: Thu, 28 Oct 2021 00:50:25 -0700 Subject: [PATCH 15/32] black stuff --- pymc/tests/test_gp.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index abaac22b604..5e4c0febf43 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -897,7 +897,9 @@ def testAdditiveMarginal(self): model1_logp = model1.logp({"fsum": self.y}) with pm.Model() as model2: - gptot = pm.gp.Marginal(mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs)) + gptot = pm.gp.Marginal( + mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs) + ) fsum = gptot.marginal_likelihood("f", self.X, self.y, noise=self.noise) model2_logp = model2.logp({"fsum": self.y}) npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) @@ -919,9 +921,15 @@ def testAdditiveMarginalSparse(self, approx): Xu = np.random.randn(10, 3) sigma = 0.1 with pm.Model() as model1: - gp1 = pm.gp.MarginalSparse(mean_func=self.means[0], cov_func=self.covs[0], approx=approx) - gp2 = pm.gp.MarginalSparse(mean_func=self.means[1], cov_func=self.covs[1], approx=approx) - gp3 = pm.gp.MarginalSparse(mean_func=self.means[2], cov_func=self.covs[2], approx=approx) + gp1 = pm.gp.MarginalSparse( + mean_func=self.means[0], cov_func=self.covs[0], approx=approx + ) + gp2 = pm.gp.MarginalSparse( + mean_func=self.means[1], cov_func=self.covs[1], approx=approx + ) + gp3 = pm.gp.MarginalSparse( + mean_func=self.means[2], cov_func=self.covs[2], approx=approx + ) gpsum = gp1 + gp2 + gp3 fsum = gpsum.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma) @@ -974,7 +982,7 @@ def testAdditiveLatent(self): logp1 = model1.logp({"fsum": self.y, "fp1": fp}) logp2 = model2.logp({"fsum": self.y, "fp2": fp}) npt.assert_allclose(logp1, logp2, atol=0, rtol=1e-2) - + def testAdditiveSparseRaises(self): # cant add different approximations with pm.Model() as model: @@ -1011,7 +1019,7 @@ def setup_method(self): y = np.random.randn(20) Xnew = np.random.randn(30, 3) pnew = np.random.randn(30) - + with pm.Model() as model1: cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) gp = pm.gp.Latent(cov_func=cov_func) From 76b6c16586313cc8267f5f4f5b476d51aa5fa4a2 Mon Sep 17 00:00:00 2001 From: bwengals Date: Thu, 28 Oct 2021 12:51:38 -0700 Subject: [PATCH 16/32] small fixes to get kronlatent and kronmarginal to pass --- pymc/gp/gp.py | 61 +++++++++++++++++++++++-------------------- pymc/gp/util.py | 12 +++++---- pymc/tests/test_gp.py | 22 ++++++++++------ 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 9e2a8d318a9..a0f2beaa281 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -21,9 +21,11 @@ import pymc as pm +from pymc.distributions.distribution import NoDistribution from pymc.gp.cov import Constant, Covariance from pymc.gp.mean import Zero from pymc.gp.util import ( + JITTER_DEFAULT, cholesky, conditioned_vars, infer_size, @@ -33,11 +35,8 @@ stabilize, ) from pymc.math import cartesian, kron_diag, kron_dot, kron_solve_lower, kron_solve_upper -from pymc.distributions.distribution import NoDistribution from pymc.model import modelcontext -JITTER_DEFAULT = 1e-6 - __all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"] @@ -163,7 +162,7 @@ def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs): Reparameterize the distribution by rotating the random variable by the Cholesky factor of the covariance matrix. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to distribution constructor. @@ -226,7 +225,7 @@ def conditional(self, name, Xnew, given=None, jitter=0.0, **kwargs): and `gp`. See the section in the documentation on additive GP models in PyMC for more information. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. For conditionals the default value is 0.0. **kwargs @@ -344,7 +343,7 @@ def conditional(self, name, Xnew, jitter=0.0, **kwargs): Xnew: array-like Function input values. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. For conditionals the default value is 0.0. **kwargs @@ -436,7 +435,7 @@ def marginal_likelihood(self, name, X, y, noise, jitter=0.0, is_observed=True, * Standard deviation of the Gaussian noise. Can also be a Covariance for non-white noise. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. Default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution @@ -477,7 +476,9 @@ def _get_given_vals(self, given): X, y, noise = self.X, self.y, self.noise return X, y, noise, cov_total, mean_total - def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total, jitter): + def _build_conditional( + self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total, jitter + ): Kxx = cov_total(X) Kxs = self.cov_func(X, Xnew) Knx = noise(X) @@ -528,7 +529,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kw and `gp`. See the section in the documentation on additive GP models in PyMC for more information. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. For conditionals the default value is 0.0. **kwargs @@ -540,7 +541,9 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kw mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) - def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=0.0, model=None): + def predict( + self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=0.0, model=None + ): R""" Return the mean vector and covariance matrix of the conditional distribution as numpy arrays, given a `point`, such as the MAP @@ -562,7 +565,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, ji given: dict Same as `conditional` method. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. For conditionals the default value is 0.0. """ @@ -708,7 +711,9 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter): quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c)) return -1.0 * (constant + logdet + quadratic + trace) - def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitter=0.0, **kwargs): + def marginal_likelihood( + self, name, X, Xu, y, noise=None, is_observed=True, jitter=0.0, **kwargs + ): R""" Returns the approximate marginal likelihood distribution, given the input locations `X`, inducing point locations `Xu`, data `y`, and white noise @@ -732,7 +737,7 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitt Whether to set `y` as an `observed` variable in the `model`. Default is `True`. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. Default value is 0.0. **kwargs Extra keyword arguments that are passed to `MvNormal` distribution @@ -787,7 +792,9 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitt **kwargs, ) - def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total, jitter): + def _build_conditional( + self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total, jitter + ): sigma2 = at.square(sigma) Kuu = cov_total(Xu) Kuf = cov_total(Xu, X) @@ -855,7 +862,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kw and `gp`. See the section in the documentation on additive GP models in PyMC for more information. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. For conditionals the default value is 0.0. **kwargs @@ -866,7 +873,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kw givens = self._get_given_vals(given) mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) - + @conditioned_vars(["Xs", "f"]) class LatentKron(Base): @@ -934,15 +941,14 @@ def __add__(self, other): raise TypeError("Additive, Kronecker-structured processes not implemented") def _build_prior(self, name, Xs, jitter, **kwargs): - self.N = np.prod([len(X) for X in Xs]) + self.N = int(np.prod([len(X) for X in Xs])) mu = self.mean_func(cartesian(*Xs)) - chols = [cholesky(stabilize(cov(X, jitter))) for cov, X in zip(self.cov_funcs, Xs)] - # remove reparameterization option + chols = [cholesky(stabilize(cov(X), jitter)) for cov, X in zip(self.cov_funcs, Xs)] v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=self.N, **kwargs) f = pm.Deterministic(name, mu + at.flatten(kron_dot(chols, v))) return f - def prior(self, name, Xs, jitter, **kwargs): + def prior(self, name, Xs, jitter=JITTER_DEFAULT, **kwargs): """ Returns the prior distribution evaluated over the input locations `Xs`. @@ -957,7 +963,7 @@ def prior(self, name, Xs, jitter, **kwargs): total covariance function is measured on the full grid `cartesian(*Xs)`. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. Default value is 1e-6. **kwargs Extra keyword arguments that are passed to the `KroneckerNormal` @@ -965,6 +971,7 @@ def prior(self, name, Xs, jitter, **kwargs): """ if len(Xs) != len(self.cov_funcs): raise ValueError("Must provide a covariance function for each X") + f = self._build_prior(name, Xs, jitter, **kwargs) self.Xs = Xs self.f = f @@ -1015,7 +1022,7 @@ def conditional(self, name, Xnew, jitter=0.0, **kwargs): Function input values. If one-dimensional, must be a column vector with shape `(n, 1)`. jitter: scalar - A small correction added to the diagonal of positive semi-definite + A small correction added to the diagonal of positive semi-definite covariance matrices to ensure numerical stability. For conditionals the default value is 0.0. **kwargs @@ -1023,8 +1030,7 @@ def conditional(self, name, Xnew, jitter=0.0, **kwargs): constructor. """ mu, cov = self._build_conditional(Xnew, jitter) - size = infer_size(Xnew, kwargs.pop("size", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @conditioned_vars(["Xs", "y", "sigma"]) @@ -1103,7 +1109,7 @@ def _build_marginal_likelihood(self, Xs): return mu, covs def _check_inputs(self, Xs, y): - N = np.prod([len(X) for X in Xs]) + N = int(np.prod([len(X) for X in Xs])) if len(Xs) != len(self.cov_funcs): raise ValueError("Must provide a covariance function for each X") if N != len(y): @@ -1152,7 +1158,7 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): "unobserved use gp.LatentKron instead.", DeprecationWarning, ) - size = np.prod([len(X) for X in Xs]) + size = int(np.prod([len(X) for X in Xs])) return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs) def _build_conditional(self, Xnew, pred_noise, diag): @@ -1229,8 +1235,7 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs): constructor. """ mu, cov = self._build_conditional(Xnew, pred_noise, False) - size = infer_size(Xnew, kwargs.pop("size", None)) - return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs) + return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None): R""" diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 39fe62e9bea..5b1efb7a695 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -35,15 +35,17 @@ from pymc.aesaraf import compile_rv_inplace, walk_model +JITTER_DEFAULT = 1e-6 + # TODO: add test def replace_with_values(model, vars_needed, replacements=None): R""" Replace random variable nodes in the graph with values given by the replacements dict. Uses untransformed versions of the inputs, performs some basic input validation. - + NOTE TO REVIEWER: Modified this from `sample_posterior_predictive`. Is there a better way to do this? - + """ inputs, input_names = [], [] for rv in walk_model(vars_needed, walk_past_rvs=True): @@ -53,8 +55,8 @@ def replace_with_values(model, vars_needed, replacements=None): # then deterministic, no inputs are required, can eval and return if len(inputs) == 0: - return tuple([v.eval() for v in vars_needed]) - + return tuple(v.eval() for v in vars_needed) + fn = compile_rv_inplace( inputs, vars_needed, @@ -95,7 +97,7 @@ def infer_size(X, n_points=None): return n_points -def stabilize(K, jitter=1e-6): +def stabilize(K, jitter=JITTER_DEFAULT): R""" Adds small diagonal to a covariance matrix. diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index 5e4c0febf43..232121cbe9f 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -937,7 +937,7 @@ def testAdditiveMarginalSparse(self, approx): with pm.Model() as model2: gptot = pm.gp.MarginalSparse( - reduce(add, self.means), reduce(add, self.covs), approx=approx + mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs), approx=approx ) fsum = gptot.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma) model2_logp = model2.logp({"fsum": self.y}) @@ -953,9 +953,11 @@ def testAdditiveMarginalSparse(self, approx): fp2 = gptot.conditional("fp2", self.Xnew) fp = np.random.randn(self.Xnew.shape[0]) - npt.assert_allclose( - pm.logp(fp1, fp1).eval({fp1: fp}), pm.logp(fp2, fp2).eval({fp2: fp}), atol=0, rtol=1e-2 - ) + + model1_logp = model1.logp({"fp1": fp, "fsum": self.y}) + model2_logp = model2.logp({"fp2": fp, "fsum": self.y}) + + npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2) def testAdditiveLatent(self): with pm.Model() as model1: @@ -1061,7 +1063,6 @@ def testAdditiveTPRaises(self): gp1 + gp2 -@pytest.mark.xfail(reason="MvNormal was not yet refactored") class TestLatentKron: """ Compare gp.LatentKron to gp.Latent, both with Gaussian noise. @@ -1117,7 +1118,6 @@ def testLatentKronRaisesSizes(self): gp.prior("f", Xs=[np.linspace(0, 1, 7)[:, None], np.linspace(0, 1, 5)[:, None]]) -@pytest.mark.xfail(reason="The `gp.predict` method was not yet refactored") class TestMarginalKron: """ Compare gp.MarginalKron to gp.Marginal. @@ -1136,6 +1136,10 @@ def setup_method(self): self.Xnew = np.concatenate(self.Xnews, axis=1) self.sigma = 0.2 self.pnew = np.random.randn(len(self.Xnew)) + + print("pnew", self.pnew) + print("y", self.y) + ls = 0.2 with pm.Model() as model: self.cov_funcs = [ @@ -1154,7 +1158,8 @@ def setup_method(self): def testMarginalKronvsMarginalpredict(self): with pm.Model() as kron_model: kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) - f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N) + # f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N) + f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma) p = kron_gp.conditional("p", self.Xnew) mu, cov = kron_gp.predict(self.Xnew) npt.assert_allclose(mu, self.mu, atol=1e-5, rtol=1e-2) @@ -1163,7 +1168,8 @@ def testMarginalKronvsMarginalpredict(self): def testMarginalKronvsMarginal(self): with pm.Model() as kron_model: kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) - f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N) + # f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N) + f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma) p = kron_gp.conditional("p", self.Xnew) kron_logp = kron_model.logp({"p": self.pnew}) npt.assert_allclose(kron_logp, self.logp, atol=0, rtol=1e-2) From 90ae450fcd6b73a2d19ba357b9258fc716d710d6 Mon Sep 17 00:00:00 2001 From: bwengals Date: Thu, 28 Oct 2021 13:08:22 -0700 Subject: [PATCH 17/32] remove leftover prints --- pymc/tests/test_gp.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index 232121cbe9f..22f22792aae 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -1137,9 +1137,6 @@ def setup_method(self): self.sigma = 0.2 self.pnew = np.random.randn(len(self.Xnew)) - print("pnew", self.pnew) - print("y", self.y) - ls = 0.2 with pm.Model() as model: self.cov_funcs = [ From df1fe573d15370f2a95b18f09c34c038fb230497 Mon Sep 17 00:00:00 2001 From: bwengals Date: Fri, 29 Oct 2021 19:41:13 -0700 Subject: [PATCH 18/32] dep warning -> future warning --- pymc/gp/gp.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index a0f2beaa281..f3f72cb0725 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -454,7 +454,7 @@ def marginal_likelihood(self, name, X, y, noise, jitter=0.0, is_observed=True, * warnings.warn( "The 'is_observed' argument has been deprecated. If the GP is " "unobserved use gp.Latent instead.", - DeprecationWarning, + FutureWarning, ) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) @@ -776,7 +776,7 @@ def marginal_likelihood( warnings.warn( "The 'is_observed' argument has been deprecated. If the GP is " "unobserved use gp.Latent instead.", - DeprecationWarning, + FutureWarning, ) return pm.DensityDist( name, @@ -1156,7 +1156,7 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): warnings.warn( "The 'is_observed' argument has been deprecated. If the GP is " "unobserved use gp.LatentKron instead.", - DeprecationWarning, + FutureWarning, ) size = int(np.prod([len(X) for X in Xs])) return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs) From dceabf8a448f4e8e88ef4b6e214cc8c4917cc49b Mon Sep 17 00:00:00 2001 From: Bill Engels Date: Tue, 2 Nov 2021 18:47:53 -0700 Subject: [PATCH 19/32] roll back mkl and mkl-service version --- conda-envs/windows-environment-dev-py38.yml | 2 ++ conda-envs/windows-environment-test-py38.yml | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/conda-envs/windows-environment-dev-py38.yml b/conda-envs/windows-environment-dev-py38.yml index 2f143a72af0..9dda7976d14 100644 --- a/conda-envs/windows-environment-dev-py38.yml +++ b/conda-envs/windows-environment-dev-py38.yml @@ -11,6 +11,8 @@ dependencies: - cloudpickle - fastprogress>=0.2.0 - h5py +- mkl==2020.4 +- mkl-service==2.3.0 - numpy>=1.15.0 - pandas - pip diff --git a/conda-envs/windows-environment-test-py38.yml b/conda-envs/windows-environment-test-py38.yml index 490e402c632..877b849882e 100644 --- a/conda-envs/windows-environment-test-py38.yml +++ b/conda-envs/windows-environment-test-py38.yml @@ -12,7 +12,8 @@ dependencies: - fastprogress>=0.2.0 - h5py>=2.7 - libpython -- mkl-service +- mkl==2020.4 +- mkl-service==2.3.0 - m2w64-toolchain - numpy>=1.15.0 - pandas From 4692407efcef912a5a7bc1c7d5bc976efd2262b2 Mon Sep 17 00:00:00 2001 From: bwengals Date: Tue, 2 Nov 2021 19:26:20 -0700 Subject: [PATCH 20/32] fix precommit --- pymc/gp/gp.py | 1 + pymc/gp/util.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index f3f72cb0725..65c7841cc4a 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -21,6 +21,7 @@ import pymc as pm +# Avoid circular dependency from pymc.distributions.distribution import NoDistribution from pymc.gp.cov import Constant, Covariance from pymc.gp.mean import Zero diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 5b1efb7a695..4dd9d9b7954 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -14,7 +14,7 @@ import warnings -from typing import Dict, Optional +from typing import Dict import aesara.tensor as at import numpy as np From cc16bd530b123927168d2e83441db8c6400849d3 Mon Sep 17 00:00:00 2001 From: bwengals Date: Tue, 2 Nov 2021 19:28:46 -0700 Subject: [PATCH 21/32] remove old DeprecationWarning --- pymc/gp/gp.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 65c7841cc4a..475e7314059 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -748,18 +748,12 @@ def marginal_likelihood( self.X = X self.Xu = Xu self.y = y + if noise is None: - sigma = kwargs.get("sigma") - if sigma is None: - raise ValueError("noise argument must be specified") - else: - self.sigma = sigma - warnings.warn( - "The 'sigma' argument has been deprecated. Use 'noise' instead.", - DeprecationWarning, - ) + raise ValueError("noise argument must be specified") else: self.sigma = noise + if is_observed: return pm.DensityDist( name, From 9763b98fcdb5d748a993e50286c9ebfa2497d41f Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 3 Nov 2021 16:53:11 -0700 Subject: [PATCH 22/32] improve tests cleanup gp.util.replace_with_values and add docstrings --- pymc/gp/cov.py | 2 +- pymc/gp/gp.py | 16 +++--- pymc/gp/util.py | 31 ++++++++--- pymc/tests/test_gp.py | 117 +++++++++++++++++++++++++++++++----------- 4 files changed, 118 insertions(+), 48 deletions(-) diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index bbeeb41026f..f5dc1c9a13b 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -152,7 +152,7 @@ def __array_wrap__(self, result): elif isinstance(result[0][0], Prod): return result[0][0].factor_list[0] * A else: - raise RuntimeError + raise TypeError(f"Unknown Covariance combination type {result[0][0]}. Known types are `Add` or `Prod`.") class Combination(Covariance): diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 475e7314059..4f762ecb664 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -1109,9 +1109,7 @@ def _check_inputs(self, Xs, y): raise ValueError("Must provide a covariance function for each X") if N != len(y): raise ValueError( - ( - "Length of y ({}) must match length of cartesian" "cartesian product of Xs ({})" - ).format(len(y), N) + f"Length of y ({len(y)}) must match length of cartesian product of Xs ({N})" ) def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): @@ -1156,7 +1154,7 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): size = int(np.prod([len(X) for X in Xs])) return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs) - def _build_conditional(self, Xnew, pred_noise, diag): + def _build_conditional(self, Xnew, diag, pred_noise): Xs, y, sigma = self.Xs, self.y, self.sigma # Old points @@ -1170,7 +1168,9 @@ def _build_conditional(self, Xnew, pred_noise, diag): eigs += sigma ** 2 # New points + print('diag', diag) Km = self.cov_func(Xnew, diag=diag) + print('Km', Km.eval()) Knm = self.cov_func(X, Xnew) Kmn = Knm.T @@ -1195,7 +1195,7 @@ def _build_conditional(self, Xnew, pred_noise, diag): cov += sigma * at.identity_like(cov) return mu, cov - def conditional(self, name, Xnew, pred_noise=False, **kwargs): + def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs): """ Returns the conditional distribution evaluated over new input locations `Xnew`, just as in `Marginal`. @@ -1229,7 +1229,7 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs): Extra keyword arguments that are passed to `MvNormal` distribution constructor. """ - mu, cov = self._build_conditional(Xnew, pred_noise, False) + mu, cov = self._build_conditional(Xnew, diag, pred_noise) return pm.MvNormal(name, mu=mu, cov=cov, **kwargs) def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None): @@ -1252,8 +1252,8 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None): Whether or not observation noise is included in the conditional. Default is `False`. """ + mu, cov = self._predict_at(Xnew, diag, pred_noise) model = modelcontext(model) - mu, cov = self._predict_at(Xnew, pred_noise, diag) return replace_with_values(model, [mu, cov], replacements=point) def _predict_at(self, Xnew, diag=False, pred_noise=False): @@ -1273,5 +1273,5 @@ def _predict_at(self, Xnew, diag=False, pred_noise=False): Whether or not observation noise is included in the conditional. Default is `False`. """ - mu, cov = self._build_conditional(Xnew, pred_noise, diag) + mu, cov = self._build_conditional(Xnew, diag, pred_noise) return mu, cov diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 4dd9d9b7954..3414ed741c9 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -43,9 +43,15 @@ def replace_with_values(model, vars_needed, replacements=None): R""" Replace random variable nodes in the graph with values given by the replacements dict. Uses untransformed versions of the inputs, performs some basic input validation. - - NOTE TO REVIEWER: Modified this from `sample_posterior_predictive`. Is there a better way to do this? - + + Parameters + ---------- + model: Model + A PyMC model object + vars_needed: list of TensorVariables + A list of variable outputs + replacements: dict with string keys, numeric values + """ inputs, input_names = [], [] for rv in walk_model(vars_needed, walk_past_rvs=True): @@ -64,11 +70,14 @@ def replace_with_values(model, vars_needed, replacements=None): accept_inplace=True, on_unused_input="ignore", ) - + + # remove unneeded inputs replacements = {name: val for name, val in replacements.items() if name in input_names} - missing = set(vars_needed) - set(replacements.keys()) + missing = set(input_names) - set(replacements.keys()) + + # error if more inputs needed if len(missing) > 0: - missing_str = ", ".join({m.name for m in missing}) + missing_str = ", ".join(missing) raise ValueError(f"Values for {missing_str} must be included in `replacements`.") return fn(**replacements) @@ -115,7 +124,7 @@ def stabilize(K, jitter=JITTER_DEFAULT): return K + jitter * at.identity_like(K) -def kmeans_inducing_points(n_inducing, X): +def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs): R""" Use the K-means algorithm to initialize the locations `X` for the inducing points `fu`. @@ -126,6 +135,8 @@ def kmeans_inducing_points(n_inducing, X): The number of inducing points (or k, the number of clusters) X: array-like Gaussian process input matrix. + **kmeans_kwargs: + Extra keyword arguments that are passed to `scipy.cluster.vq.kmeans` """ # first whiten X if isinstance(X, TensorConstant): @@ -143,7 +154,11 @@ def kmeans_inducing_points(n_inducing, X): # if std of a column is very small (zero), don't normalize that column scaling[scaling <= 1e-6] = 1.0 Xw = X / scaling - Xu, distortion = kmeans(Xw, n_inducing) + + if "k_or_guess" in kmeans_kwargs: + warn.UserWarning("Use `n_inducing` to set the `k_or_guess` parameter instead.") + + Xu, distortion = kmeans(Xw, k_or_guess=n_inducing, **kmeans_kwargs) return Xu * scaling diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index 22f22792aae..6de5243a7c0 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -688,6 +688,34 @@ def test_raises(self): with pytest.raises(TypeError): pm.gp.cov.ScaledCov(1, "str is not Covariance object", lambda x: x) + +class TestCircular: + def test_1d_tau1(self): + X = np.linspace(0, 1, 10)[:, None] + etalon = 0.600881 + with pm.Model(): + cov = pm.gp.cov.Circular(1, 1, tau=5) + K = cov(X).eval() + npt.assert_allclose(K[0, 1], etalon, atol=1e-3) + K = cov(X, X).eval() + npt.assert_allclose(K[0, 1], etalon, atol=1e-3) + # check diagonal + Kd = cov(X, diag=True).eval() + npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + + def test_1d_tau2(self): + X = np.linspace(0, 1, 10)[:, None] + etalon = 0.691239 + with pm.Model(): + cov = pm.gp.cov.Circular(1, 1, tau=4) + K = cov(X).eval() + npt.assert_allclose(K[0, 1], etalon, atol=1e-3) + K = cov(X, X).eval() + npt.assert_allclose(K[0, 1], etalon, atol=1e-3) + # check diagonal + Kd = cov(X, diag=True).eval() + npt.assert_allclose(np.diag(K), Kd, atol=1e-5) + class TestHandleArgs: def test_handleargs(self): @@ -765,7 +793,7 @@ def test_raises3(self): with pm.Model() as model: with pytest.raises(ValueError): B = pm.gp.cov.Coregion(1) - + class TestMarginalVsLatent: R""" @@ -1155,17 +1183,18 @@ def setup_method(self): def testMarginalKronvsMarginalpredict(self): with pm.Model() as kron_model: kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) - # f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N) f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma) p = kron_gp.conditional("p", self.Xnew) mu, cov = kron_gp.predict(self.Xnew) npt.assert_allclose(mu, self.mu, atol=1e-5, rtol=1e-2) npt.assert_allclose(cov, self.cov, atol=1e-5, rtol=1e-2) + with kron_model: + _, var = kron_gp.predict(self.Xnew, diag=True) + npt.assert_allclose(np.diag(cov), var, atol=1e-5, rtol=1e-2) def testMarginalKronvsMarginal(self): with pm.Model() as kron_model: kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs) - # f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N) f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma) p = kron_gp.conditional("p", self.Xnew) kron_logp = kron_model.logp({"p": self.pnew}) @@ -1179,7 +1208,7 @@ def testMarginalKronRaises(self): gp1 + gp2 -class TestUtil: +class TestPlotGP: def test_plot_gp_dist(self): """Test that the plotting helper works with the stated input shapes.""" import matplotlib.pyplot as plt @@ -1206,31 +1235,57 @@ def test_plot_gp_dist_warn_nan(self): pm.gp.util.plot_gp_dist(ax, x=np.linspace(0, 50, X), samples=samples) plt.close() pass + - -class TestCircular: - def test_1d_tau1(self): - X = np.linspace(0, 1, 10)[:, None] - etalon = 0.600881 - with pm.Model(): - cov = pm.gp.cov.Circular(1, 1, tau=5) - K = cov(X).eval() - npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - K = cov(X, X).eval() - npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - # check diagonal - Kd = cov(X, diag=True).eval() - npt.assert_allclose(np.diag(K), Kd, atol=1e-5) - - def test_1d_tau2(self): - X = np.linspace(0, 1, 10)[:, None] - etalon = 0.691239 - with pm.Model(): - cov = pm.gp.cov.Circular(1, 1, tau=4) - K = cov(X).eval() - npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - K = cov(X, X).eval() - npt.assert_allclose(K[0, 1], etalon, atol=1e-3) - # check diagonal - Kd = cov(X, diag=True).eval() - npt.assert_allclose(np.diag(K), Kd, atol=1e-5) +class TestKmeansInducing: + def setup_method(self): + self.centers = (-5, 5) + self.x = np.concatenate( + ( + self.centers[0] + np.random.randn(500), + self.centers[1] + np.random.randn(500) + ) + ) + + def test_kmeans(self): + X = self.x[:, None] + Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten() + npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1) + + X = at.as_tensor_variable(self.x[:, None]) + Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten() + npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1) + + def test_kmeans_raises(self): + with pytest.raises(TypeError): + Xu = pm.gp.util.kmeans_inducing_points(2, "str is the wrong type").flatten() + + + +class TestReplaceWithValues: + def test_basic_replace(self): + with pm.Model() as model: + a = pm.Normal("a") + b = pm.Normal("b", mu=a) + c = a*b + + c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"a": 2, "b": 3, "x": 100}) + assert c_val == np.array(6.0) + + def test_replace_no_inputs_needed(self): + with pm.Model() as model: + a = at.as_tensor_variable(2.0) + b = 1.0 + a + c = a*b + + c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"x": 100}) + assert c_val == np.array(6.0) + + def test_missing_input(self): + with pm.Model() as model: + a = pm.Normal("a") + b = pm.Normal("b", mu=a) + c = a*b + + with pytest.raises(ValueError): + c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"a": 2, "x": 100}) \ No newline at end of file From 23fc5c7bb492eee3756f2710d6461c610b27da66 Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 3 Nov 2021 17:08:19 -0700 Subject: [PATCH 23/32] fix pre-commit issue --- pymc/gp/gp.py | 14 +++---------- pymc/gp/util.py | 31 ++++++++++++++++------------ pymc/tests/test_gp.py | 47 +++++++++++++++++++++---------------------- 3 files changed, 44 insertions(+), 48 deletions(-) diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py index 4f762ecb664..b8a76519598 100644 --- a/pymc/gp/gp.py +++ b/pymc/gp/gp.py @@ -21,8 +21,6 @@ import pymc as pm -# Avoid circular dependency -from pymc.distributions.distribution import NoDistribution from pymc.gp.cov import Constant, Covariance from pymc.gp.mean import Zero from pymc.gp.util import ( @@ -36,7 +34,6 @@ stabilize, ) from pymc.math import cartesian, kron_diag, kron_dot, kron_solve_lower, kron_solve_upper -from pymc.model import modelcontext __all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"] @@ -572,9 +569,8 @@ def predict( """ if given is None: given = {} - model = modelcontext(model) mu, cov = self._predict_at(Xnew, diag, pred_noise, given, jitter) - return replace_with_values(model, [mu, cov], replacements=point) + return replace_with_values([mu, cov], replacements=point, model=model) def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=0.0): R""" @@ -1109,7 +1105,7 @@ def _check_inputs(self, Xs, y): raise ValueError("Must provide a covariance function for each X") if N != len(y): raise ValueError( - f"Length of y ({len(y)}) must match length of cartesian product of Xs ({N})" + f"Length of y ({len(y)}) must match length of cartesian product of Xs ({N})" ) def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs): @@ -1167,10 +1163,7 @@ def _build_conditional(self, Xnew, diag, pred_noise): if sigma is not None: eigs += sigma ** 2 - # New points - print('diag', diag) Km = self.cov_func(Xnew, diag=diag) - print('Km', Km.eval()) Knm = self.cov_func(X, Xnew) Kmn = Knm.T @@ -1253,8 +1246,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None): Default is `False`. """ mu, cov = self._predict_at(Xnew, diag, pred_noise) - model = modelcontext(model) - return replace_with_values(model, [mu, cov], replacements=point) + return replace_with_values([mu, cov], replacements=point, model=model) def _predict_at(self, Xnew, diag=False, pred_noise=False): R""" diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 3414ed741c9..5ffd36b8318 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -14,8 +14,6 @@ import warnings -from typing import Dict - import aesara.tensor as at import numpy as np @@ -35,24 +33,31 @@ from pymc.aesaraf import compile_rv_inplace, walk_model +# Avoid circular dependency when importing modelcontext +from pymc.distributions.distribution import NoDistribution + +assert NoDistribution # keep both pylint and black happy +from pymc.model import modelcontext + JITTER_DEFAULT = 1e-6 -# TODO: add test -def replace_with_values(model, vars_needed, replacements=None): +def replace_with_values(vars_needed, replacements=None, model=None): R""" Replace random variable nodes in the graph with values given by the replacements dict. Uses untransformed versions of the inputs, performs some basic input validation. - + Parameters ---------- - model: Model - A PyMC model object vars_needed: list of TensorVariables A list of variable outputs replacements: dict with string keys, numeric values - + The variable name and values to be replaced in the model graph. + model: Model + A PyMC model object """ + model = modelcontext(model) + inputs, input_names = [], [] for rv in walk_model(vars_needed, walk_past_rvs=True): if rv in model.named_vars.values() and not isinstance(rv, SharedVariable): @@ -70,11 +75,11 @@ def replace_with_values(model, vars_needed, replacements=None): accept_inplace=True, on_unused_input="ignore", ) - - # remove unneeded inputs + + # remove unneeded inputs replacements = {name: val for name, val in replacements.items() if name in input_names} missing = set(input_names) - set(replacements.keys()) - + # error if more inputs needed if len(missing) > 0: missing_str = ", ".join(missing) @@ -154,10 +159,10 @@ def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs): # if std of a column is very small (zero), don't normalize that column scaling[scaling <= 1e-6] = 1.0 Xw = X / scaling - + if "k_or_guess" in kmeans_kwargs: warn.UserWarning("Use `n_inducing` to set the `k_or_guess` parameter instead.") - + Xu, distortion = kmeans(Xw, k_or_guess=n_inducing, **kmeans_kwargs) return Xu * scaling diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index 6de5243a7c0..48ca02bd306 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -688,7 +688,7 @@ def test_raises(self): with pytest.raises(TypeError): pm.gp.cov.ScaledCov(1, "str is not Covariance object", lambda x: x) - + class TestCircular: def test_1d_tau1(self): X = np.linspace(0, 1, 10)[:, None] @@ -793,7 +793,7 @@ def test_raises3(self): with pm.Model() as model: with pytest.raises(ValueError): B = pm.gp.cov.Coregion(1) - + class TestMarginalVsLatent: R""" @@ -1235,57 +1235,56 @@ def test_plot_gp_dist_warn_nan(self): pm.gp.util.plot_gp_dist(ax, x=np.linspace(0, 50, X), samples=samples) plt.close() pass - + class TestKmeansInducing: def setup_method(self): self.centers = (-5, 5) self.x = np.concatenate( - ( - self.centers[0] + np.random.randn(500), - self.centers[1] + np.random.randn(500) - ) + (self.centers[0] + np.random.randn(500), self.centers[1] + np.random.randn(500)) ) - + def test_kmeans(self): X = self.x[:, None] Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten() npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1) - + X = at.as_tensor_variable(self.x[:, None]) Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten() npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1) - + def test_kmeans_raises(self): with pytest.raises(TypeError): Xu = pm.gp.util.kmeans_inducing_points(2, "str is the wrong type").flatten() - - - + + class TestReplaceWithValues: def test_basic_replace(self): with pm.Model() as model: a = pm.Normal("a") b = pm.Normal("b", mu=a) - c = a*b - - c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"a": 2, "b": 3, "x": 100}) + c = a * b + + (c_val,) = pm.gp.util.replace_with_values( + [c], replacements={"a": 2, "b": 3, "x": 100}, model=model + ) assert c_val == np.array(6.0) - + def test_replace_no_inputs_needed(self): with pm.Model() as model: a = at.as_tensor_variable(2.0) b = 1.0 + a - c = a*b - - c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"x": 100}) + c = a * b + (c_val,) = pm.gp.util.replace_with_values([c], replacements={"x": 100}) assert c_val == np.array(6.0) - + def test_missing_input(self): with pm.Model() as model: a = pm.Normal("a") b = pm.Normal("b", mu=a) - c = a*b - + c = a * b + with pytest.raises(ValueError): - c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"a": 2, "x": 100}) \ No newline at end of file + (c_val,) = pm.gp.util.replace_with_values( + [c], replacements={"a": 2, "x": 100}, model=model + ) From 158529f51743f20385ae618d050f6c2b75ce0e2a Mon Sep 17 00:00:00 2001 From: bwengals Date: Wed, 3 Nov 2021 17:14:12 -0700 Subject: [PATCH 24/32] fix precommit on cov.py --- pymc/gp/cov.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py index f5dc1c9a13b..1a19edb2bbf 100644 --- a/pymc/gp/cov.py +++ b/pymc/gp/cov.py @@ -152,7 +152,9 @@ def __array_wrap__(self, result): elif isinstance(result[0][0], Prod): return result[0][0].factor_list[0] * A else: - raise TypeError(f"Unknown Covariance combination type {result[0][0]}. Known types are `Add` or `Prod`.") + raise TypeError( + f"Unknown Covariance combination type {result[0][0]}. Known types are `Add` or `Prod`." + ) class Combination(Covariance): From 08a7652dac3a6886fba730b7160bd13722f485b8 Mon Sep 17 00:00:00 2001 From: bwengals Date: Sun, 7 Nov 2021 17:07:30 -0800 Subject: [PATCH 25/32] fix comment --- pymc/gp/util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pymc/gp/util.py b/pymc/gp/util.py index 5ffd36b8318..730e664eec4 100644 --- a/pymc/gp/util.py +++ b/pymc/gp/util.py @@ -64,7 +64,7 @@ def replace_with_values(vars_needed, replacements=None, model=None): inputs.append(rv) input_names.append(rv.name) - # then deterministic, no inputs are required, can eval and return + # Then it's deterministic, no inputs are required, can eval and return if len(inputs) == 0: return tuple(v.eval() for v in vars_needed) @@ -76,11 +76,11 @@ def replace_with_values(vars_needed, replacements=None, model=None): on_unused_input="ignore", ) - # remove unneeded inputs + # Remove unneeded inputs replacements = {name: val for name, val in replacements.items() if name in input_names} missing = set(input_names) - set(replacements.keys()) - # error if more inputs needed + # Error if more inputs are needed if len(missing) > 0: missing_str = ", ".join(missing) raise ValueError(f"Values for {missing_str} must be included in `replacements`.") From 196e7029e57f278b1d3b088446f1aecf813767e2 Mon Sep 17 00:00:00 2001 From: bwengals Date: Sun, 7 Nov 2021 17:43:33 -0800 Subject: [PATCH 26/32] dont force blas version in windows dev enviornment (roll back changes) --- conda-envs/windows-environment-dev-py38.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/conda-envs/windows-environment-dev-py38.yml b/conda-envs/windows-environment-dev-py38.yml index 9dda7976d14..2f143a72af0 100644 --- a/conda-envs/windows-environment-dev-py38.yml +++ b/conda-envs/windows-environment-dev-py38.yml @@ -11,8 +11,6 @@ dependencies: - cloudpickle - fastprogress>=0.2.0 - h5py -- mkl==2020.4 -- mkl-service==2.3.0 - numpy>=1.15.0 - pandas - pip From d02e32065f8be770a58f9299fe7cb40eb48bb414 Mon Sep 17 00:00:00 2001 From: bwengals Date: Mon, 8 Nov 2021 17:22:17 -0800 Subject: [PATCH 27/32] update release notes --- RELEASE-NOTES.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 6d58c0adca1..8555362ca83 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -45,8 +45,14 @@ All of the above apply to: - Changes to the BART implementation: - A BART variable can be combined with other random variables. The `inv_link` argument has been removed (see [4914](https://github.com/pymc-devs/pymc3/pull/4914)). - Moved BART to its own module (see [5058](https://github.com/pymc-devs/pymc3/pull/5058)). -- ... - +- Changes to the Gaussian Process (GP) submodule: + - For all implementations, `gp.Latent`, `gp.Marginal` etc., `cov_func` and `mean_func` are required kwargs. + - In Windows test conda environment the `mkl` version is fixed to verison 2020.4, and `mkl-service` is fixed to `2.3.0`. This was required for `gp.MarginalKron` to function properly. + - `gp.MvStudentT` uses rotated samples from `StudentT` directly now, instead of sampling from `pm.Chi2` and then from `pm.Normal`. + - The "jitter" parameter, or the diagonal noise term added to Gram matrices such that the Cholesky is numerically stable, is now exposed to the user instead of hard-coded. See the function `gp.util.stabilize`. + - The `is_observed` arguement for `gp.Marginal*` implementations has been deprecated. + - In the gp.utils file, the `kmeans_inducing_points` function now passes through `kmeans_kwargs` to scipy's k-means function. + - The function `replace_with_values` function has been added to `gp.utils`. ### Expected breaks + New API was already available in `v3`. From 8fee214a1e8ba51cc0109b767b8ee9aee8e42f79 Mon Sep 17 00:00:00 2001 From: bwengals Date: Mon, 8 Nov 2021 17:23:03 -0800 Subject: [PATCH 28/32] add removed ... line from release notes --- RELEASE-NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 8555362ca83..35c4f64e73a 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -53,6 +53,7 @@ All of the above apply to: - The `is_observed` arguement for `gp.Marginal*` implementations has been deprecated. - In the gp.utils file, the `kmeans_inducing_points` function now passes through `kmeans_kwargs` to scipy's k-means function. - The function `replace_with_values` function has been added to `gp.utils`. + - ... ### Expected breaks + New API was already available in `v3`. From d5474a7d7cb96a363a4f4cc375a8f98bd3ebb743 Mon Sep 17 00:00:00 2001 From: bwengals Date: Fri, 12 Nov 2021 16:35:28 -0800 Subject: [PATCH 29/32] add link to PR --- RELEASE-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 35c4f64e73a..ac58184d6a6 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -45,7 +45,7 @@ All of the above apply to: - Changes to the BART implementation: - A BART variable can be combined with other random variables. The `inv_link` argument has been removed (see [4914](https://github.com/pymc-devs/pymc3/pull/4914)). - Moved BART to its own module (see [5058](https://github.com/pymc-devs/pymc3/pull/5058)). -- Changes to the Gaussian Process (GP) submodule: +- Changes to the Gaussian Process (GP) submodule (see [5055](https://github.com/pymc-devs/pymc/pull/5055)): - For all implementations, `gp.Latent`, `gp.Marginal` etc., `cov_func` and `mean_func` are required kwargs. - In Windows test conda environment the `mkl` version is fixed to verison 2020.4, and `mkl-service` is fixed to `2.3.0`. This was required for `gp.MarginalKron` to function properly. - `gp.MvStudentT` uses rotated samples from `StudentT` directly now, instead of sampling from `pm.Chi2` and then from `pm.Normal`. From 53485759aab677ea112801bde8eb0251a7d9b8ba Mon Sep 17 00:00:00 2001 From: bwengals Date: Sat, 20 Nov 2021 14:16:47 -0800 Subject: [PATCH 30/32] remove is_observed usage from TestMarginalVsLatent --- pymc/tests/test_gp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index 48ca02bd306..a4dae696848 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -809,7 +809,7 @@ def setup_method(self): cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func) - f = gp.marginal_likelihood("f", X, y, noise=0.0, is_observed=False, observed=y) + f = gp.marginal_likelihood("f", X, y, noise=0.0) p = gp.conditional("p", Xnew) self.logp = model.logp({"p": pnew}) self.X = X From c2b850d0097312cd7518645a7694ac0fa333f42e Mon Sep 17 00:00:00 2001 From: bwengals Date: Sat, 20 Nov 2021 14:26:23 -0800 Subject: [PATCH 31/32] remove is_observed usage from TestMarginalVsMarginalSparse --- pymc/tests/test_gp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py index a4dae696848..7ae9b23c866 100644 --- a/pymc/tests/test_gp.py +++ b/pymc/tests/test_gp.py @@ -894,7 +894,7 @@ def testPredictCov(self): cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3]) mean_func = pm.gp.mean.Constant(0.5) gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx="DTC") - f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False) + f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma) mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True) mu2, cov2 = gp.predict(self.Xnew, pred_noise=True) npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3) From 461d26bc1828f19100b1b038d6b83c1f4d125989 Mon Sep 17 00:00:00 2001 From: Thomas Wiecki Date: Sun, 21 Nov 2021 09:02:29 +0100 Subject: [PATCH 32/32] Update RELEASE-NOTES.md --- RELEASE-NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index ae3d190cf91..f3035fe825e 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -53,7 +53,7 @@ All of the above apply to: - The `is_observed` arguement for `gp.Marginal*` implementations has been deprecated. - In the gp.utils file, the `kmeans_inducing_points` function now passes through `kmeans_kwargs` to scipy's k-means function. - The function `replace_with_values` function has been added to `gp.utils`. - - ... +- ... ### Expected breaks + New API was already available in `v3`.