From 984a1d04bce65c07e399a4ca1676476da472f440 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Tue, 5 Oct 2021 10:46:27 -0700
Subject: [PATCH 01/32] np.int -> int, fix np DepricationWarning

---
 pymc/gp/cov.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index ccbbe59b81e..bbeeb41026f 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -64,7 +64,7 @@ def __init__(self, input_dim, active_dims=None):
         if active_dims is None:
             self.active_dims = np.arange(input_dim)
         else:
-            self.active_dims = np.asarray(active_dims, np.int)
+            self.active_dims = np.asarray(active_dims, int)
 
     def __call__(self, X, Xs=None, diag=False):
         r"""

From bd916ae1ad08a888d9589f12ce20ea92b8396f57 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 6 Oct 2021 14:22:10 -0700
Subject: [PATCH 02/32] remove shape arg from non-kron implementations, TODO
 for is_observed in Marginal, mark for deprecation

---
 pymc/gp/gp.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 9f913474d46..4fe44ab030f 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -223,8 +223,7 @@ def conditional(self, name, Xnew, given=None, **kwargs):
         """
         givens = self._get_given_vals(given)
         mu, cov = self._build_conditional(Xnew, *givens)
-        shape = infer_shape(Xnew, kwargs.pop("shape", None))
-        return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs)
+        return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
 
 @conditioned_vars(["X", "f", "nu"])
@@ -341,8 +340,7 @@ def conditional(self, name, Xnew, **kwargs):
         X = self.X
         f = self.f
         nu2, mu, cov = self._build_conditional(Xnew, X, f)
-        shape = infer_shape(Xnew, kwargs.pop("shape", None))
-        return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, size=shape, **kwargs)
+        return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, **kwargs)
 
 
 @conditioned_vars(["X", "y", "noise"])
@@ -422,9 +420,6 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
         noise: scalar, Variable, or Covariance
             Standard deviation of the Gaussian noise.  Can also be a Covariance for
             non-white noise.
-        is_observed: bool
-            Whether to set `y` as an `observed` variable in the `model`.
-            Default is `True`.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -436,11 +431,12 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
         self.X = X
         self.y = y
         self.noise = noise
+        # TODO: deprecate is_observed.  untested, shouldnt be used.
         if is_observed:
             return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs)
         else:
-            shape = infer_shape(X, kwargs.pop("shape", None))
-            return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs)
+            # shape = infer_shape(X, kwargs.pop("shape", None))
+            return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
     def _get_given_vals(self, given):
         if given is None:
@@ -517,8 +513,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
 
         givens = self._get_given_vals(given)
         mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens)
-        shape = infer_shape(Xnew, kwargs.pop("shape", None))
-        return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs)
+        return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
     def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
         R"""
@@ -829,8 +824,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
 
         givens = self._get_given_vals(given)
         mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens)
-        shape = infer_shape(Xnew, kwargs.pop("shape", None))
-        return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs)
+        return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
 
 @conditioned_vars(["Xs", "f"])

From 490311faf4a713b22590515843fcd187fc85450c Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 6 Oct 2021 14:27:20 -0700
Subject: [PATCH 03/32] np.int -> int in gp/util.py

---
 pymc/gp/util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index f47331248c1..3c32ab3c392 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -48,7 +48,7 @@ def infer_shape(X, n_points=None):
     """
     if n_points is None:
         try:
-            n_points = np.int(X.shape[0])
+            n_points = int(X.shape[0])
         except TypeError:
             raise TypeError("Cannot infer 'shape', provide as an argument")
     return n_points

From a64975d90c281ea918b34a143b9db96c4b3efbe8 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Thu, 7 Oct 2021 13:43:16 -0700
Subject: [PATCH 04/32] force all mean_func, cov_func args to GP constructors
 to be required kwargs (often default zero mean_func is used)

---
 pymc/gp/gp.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 4fe44ab030f..58a73b96c7c 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -41,7 +41,7 @@ class Base:
     Base class.
     """
 
-    def __init__(self, mean_func=Zero(), cov_func=Constant(0.0)):
+    def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)):
         self.mean_func = mean_func
         self.cov_func = cov_func
 
@@ -122,8 +122,8 @@ class Latent(Base):
             fcond = gp.conditional("fcond", Xnew=Xnew)
     """
 
-    def __init__(self, mean_func=Zero(), cov_func=Constant(0.0)):
-        super().__init__(mean_func, cov_func)
+    def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)):
+        super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def _build_prior(self, name, X, reparameterize=True, **kwargs):
         mu = self.mean_func(X)
@@ -256,11 +256,11 @@ class TP(Latent):
         Processes as Alternatives to Gaussian Processes.  arXiv preprint arXiv:1402.4306.
     """
 
-    def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), nu=None):
+    def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0), nu=None):
         if nu is None:
             raise ValueError("Student's T process requires a degrees of freedom parameter, 'nu'")
         self.nu = nu
-        super().__init__(mean_func, cov_func)
+        super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
         raise TypeError("Student's T processes aren't additive")
@@ -638,7 +638,7 @@ def __init__(self, mean_func=Zero(), cov_func=Constant(0.0), approx="FITC"):
         if approx not in self._available_approx:
             raise NotImplementedError(approx)
         self.approx = approx
-        super().__init__(mean_func, cov_func)
+        super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
         # new_gp will default to FITC approx
@@ -881,13 +881,13 @@ class LatentKron(Base):
             fcond = gp.conditional("fcond", Xnew=Xnew)
     """
 
-    def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))):
+    def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))):
         try:
             self.cov_funcs = list(cov_funcs)
         except TypeError:
             self.cov_funcs = [cov_funcs]
         cov_func = pm.gp.cov.Kron(self.cov_funcs)
-        super().__init__(mean_func, cov_func)
+        super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
         raise TypeError("Additive, Kronecker-structured processes not implemented")
@@ -1037,13 +1037,13 @@ class MarginalKron(Base):
             fcond = gp.conditional("fcond", Xnew=Xnew)
     """
 
-    def __init__(self, mean_func=Zero(), cov_funcs=(Constant(0.0))):
+    def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))):
         try:
             self.cov_funcs = list(cov_funcs)
         except TypeError:
             self.cov_funcs = [cov_funcs]
         cov_func = pm.gp.cov.Kron(self.cov_funcs)
-        super().__init__(mean_func, cov_func)
+        super().__init__(mean_func=mean_func, cov_func=cov_func)
 
     def __add__(self, other):
         raise TypeError("Additive, Kronecker-structured processes not implemented")

From 77c4392c9c45488a8b236625644f335c2a6ae127 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 20 Oct 2021 14:15:14 -0700
Subject: [PATCH 05/32] fix predictt functions, rename to _predict_at.  because
 theano -> aesara

---
 pymc/gp/gp.py   | 17 +++++++----------
 pymc/gp/util.py | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 58a73b96c7c..f1fc4bf2d83 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -27,6 +27,7 @@
     cholesky,
     conditioned_vars,
     infer_shape,
+    replace_with_value,
     solve_lower,
     solve_upper,
     stabilize,
@@ -540,12 +541,10 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
         if given is None:
             given = {}
 
-        mu, cov = self.predictt(Xnew, diag, pred_noise, given)
-        # XXX: This needs to be refactored
-        # return draw_values([mu, cov], point=point)
-        return None, None
+        mu, cov = self._predict_at(Xnew, diag, pred_noise, given)
+        return replace_with_values([mu, cov], replacements=point)
 
-    def predictt(self, Xnew, diag=False, pred_noise=False, given=None):
+    def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None):
         R"""
         Return the mean vector and covariance matrix of the conditional
         distribution as symbolic variables.
@@ -1199,12 +1198,10 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False):
             Whether or not observation noise is included in the conditional.
             Default is `False`.
         """
-        mu, cov = self._build_conditional(Xnew, pred_noise, diag)
-        # XXX: This needs to be refactored
-        # return draw_values([mu, cov], point=point)
-        return None, None
+        mu, cov = self._predict_at(Xnew, pred_noise, diag)
+        return replace_with_values([mu, cov], replacements=point)
 
-    def predictt(self, Xnew, diag=False, pred_noise=False):
+    def _predict_at(self, Xnew, diag=False, pred_noise=False):
         R"""
         Return the mean vector and covariance matrix of the conditional
         distribution as symbolic variables.
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 3c32ab3c392..29edfcf3901 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -14,9 +14,12 @@
 
 import warnings
 
+from typing import Dict
+
 import aesara.tensor as at
 import numpy as np
 
+from aesara.compile import SharedVariable
 from aesara.tensor.slinalg import (  # noqa: W0611; pylint: disable=unused-import
     cholesky,
     solve,
@@ -30,6 +33,37 @@
 from aesara.tensor.var import TensorConstant
 from scipy.cluster.vq import kmeans
 
+from pymc.aesaraf import compile_rv_inplace, walk_model
+
+
+def replace_with_value(vars_needed, replacements: Dict):
+    R"""
+    Replace random variable nodes in the graph with values given in replacements.
+
+    NOTE TO REVIEWER:  Modified this from `sample_posterior_predictive`.  Is there a better way to do this?
+    """
+    inputs, input_names = [], []
+    for rv in walk_model(vars_needed, walk_past_rvs=True):
+        if rv in model.named_vars.values() and not isinstance(rv, SharedVariable):
+            inputs.append(rv)
+            input_names.append(rv.name)
+
+    fn = compile_rv_inplace(
+        inputs,
+        vars_needed,
+        allow_input_downcast=True,
+        accept_inplace=True,
+        on_unused_input="ignore",
+    )
+
+    replacements = {name: val for name, val in replacements.items() if name in input_names}
+    missing = set(needed) - set(replacements.keys())
+    if len(missing) > 0:
+        missing_str = ", ".join(missing)
+        raise ValueError(f"Values for {missing_str} must be included in `replacements`.")
+
+    return fn(**replacements)
+
 
 def infer_shape(X, n_points=None):
     R"""

From d1e09fed62c323acb19c32a0c5b71e4473ef67e9 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 27 Oct 2021 16:26:41 -0700
Subject: [PATCH 06/32] fix TP tests, force mean_func, cov_func to be req kwarg

---
 pymc/tests/test_gp.py | 44 +++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index 424e87e5e8b..d57adbabccb 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -767,7 +767,7 @@ def test_raises3(self):
                 B = pm.gp.cov.Coregion(1)
 
 
-@pytest.mark.xfail(reason="MvNormal was not yet refactored")
+# @pytest.mark.xfail(reason="MvNormal was not yet refactored")
 class TestMarginalVsLatent:
     R"""
     Compare the logp of models Marginal, noise=0 and Latent.
@@ -781,7 +781,7 @@ def setup_method(self):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.Marginal(mean_func, cov_func)
+            gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func)
             f = gp.marginal_likelihood("f", X, y, noise=0.0, is_observed=False, observed=y)
             p = gp.conditional("p", Xnew)
         self.logp = model.logp({"p": pnew})
@@ -794,7 +794,7 @@ def testLatent1(self):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.Latent(mean_func, cov_func)
+            gp = pm.gp.Latent(mean_func=mean_func, cov_func=cov_func)
             f = gp.prior("f", self.X, reparameterize=False)
             p = gp.conditional("p", self.Xnew)
         latent_logp = model.logp({"f": self.y, "p": self.pnew})
@@ -804,7 +804,7 @@ def testLatent2(self):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.Latent(mean_func, cov_func)
+            gp = pm.gp.Latent(mean_func=mean_func, cov_func=cov_func)
             f = gp.prior("f", self.X, reparameterize=True)
             p = gp.conditional("p", self.Xnew)
         chol = np.linalg.cholesky(cov_func(self.X).eval())
@@ -828,7 +828,7 @@ def setup_method(self):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.Marginal(mean_func, cov_func)
+            gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func)
             sigma = 0.1
             f = gp.marginal_likelihood("f", X, y, noise=sigma)
             p = gp.conditional("p", Xnew)
@@ -845,7 +845,7 @@ def testApproximations(self, approx):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.MarginalSparse(mean_func, cov_func, approx=approx)
+            gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx=approx)
             f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma)
             p = gp.conditional("p", self.Xnew)
         approx_logp = model.logp({"f": self.y, "p": self.pnew})
@@ -856,7 +856,7 @@ def testPredictVar(self, approx):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.MarginalSparse(mean_func, cov_func, approx=approx)
+            gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx=approx)
             f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma)
         mu1, var1 = self.gp.predict(self.Xnew, diag=True)
         mu2, var2 = gp.predict(self.Xnew, diag=True)
@@ -867,7 +867,7 @@ def testPredictCov(self):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
-            gp = pm.gp.MarginalSparse(mean_func, cov_func, approx="DTC")
+            gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx="DTC")
             f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False)
         mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True)
         mu2, cov2 = gp.predict(self.Xnew, pred_noise=True)
@@ -888,7 +888,7 @@ def setup_method(self):
         )
         self.means = (pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5))
 
-    @pytest.mark.xfail(reason="MvNormal was not yet refactored")
+    # @pytest.mark.xfail(reason="MvNormal was not yet refactored")
     def testAdditiveMarginal(self):
         with pm.Model() as model1:
             gp1 = pm.gp.Marginal(self.means[0], self.covs[0])
@@ -1007,7 +1007,7 @@ def testAdditiveTypeRaises2(self):
                 gp1 + gp2
 
 
-@pytest.mark.xfail(reason="MvNormal was not yet refactored")
+# @pytest.mark.xfail(reason="MvNormal was not yet refactored")
 class TestTP:
     R"""
     Compare TP with high degress of freedom to GP
@@ -1015,9 +1015,9 @@ class TestTP:
 
     def setup_method(self):
         X = np.random.randn(20, 3)
-        y = np.random.randn(20) * 0.01
-        Xnew = np.random.randn(50, 3)
-        pnew = np.random.randn(50) * 0.01
+        y = np.random.randn(20)
+        Xnew = np.random.randn(30, 3)
+        pnew = np.random.randn(30)
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             gp = pm.gp.Latent(cov_func=cov_func)
@@ -1027,29 +1027,29 @@ def setup_method(self):
         self.y = y
         self.Xnew = Xnew
         self.pnew = pnew
-        self.latent_logp = model.logp({"f": y, "p": pnew})
-        self.plogp = p.logp({"f": y, "p": pnew})
+        self.nu = 1000
+        self.gp_latent_logp = model.logp({"f": y, "p": pnew})
 
     def testTPvsLatent(self):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
-            tp = pm.gp.TP(cov_func=cov_func, nu=10000)
+            tp = pm.gp.TP(cov_func=cov_func, nu=self.nu)
             f = tp.prior("f", self.X, reparameterize=False)
             p = tp.conditional("p", self.Xnew)
         tp_logp = model.logp({"f": self.y, "p": self.pnew})
-        npt.assert_allclose(self.latent_logp, tp_logp, atol=0, rtol=1e-2)
+        npt.assert_allclose(self.gp_latent_logp, tp_logp, atol=0, rtol=1e-2)
 
     def testTPvsLatentReparameterized(self):
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
-            tp = pm.gp.TP(cov_func=cov_func, nu=10000)
+            tp = pm.gp.TP(cov_func=cov_func, nu=self.nu)
             f = tp.prior("f", self.X, reparameterize=True)
             p = tp.conditional("p", self.Xnew)
         chol = np.linalg.cholesky(cov_func(self.X).eval())
-        y_rotated = np.linalg.solve(chol, self.y)
-        # testing full model logp unreliable due to introduction of f_chi2__log__
-        plogp = p.logp({"f_rotated_": y_rotated, "p": self.pnew, "f_chi2__log__": np.log(1e20)})
-        npt.assert_allclose(self.plogp, plogp, atol=0, rtol=1e-2)
+        f_rotated = np.linalg.solve(chol, self.y)
+
+        tp_logp = model.logp({"f_rotated_": f_rotated, "p": self.pnew})
+        npt.assert_allclose(self.gp_latent_logp, tp_logp, atol=0, rtol=1e-2)
 
     def testAdditiveTPRaises(self):
         with pm.Model() as model:

From 31150c34fe0a6e849ddf69fb3fa65353baf06254 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 27 Oct 2021 16:27:51 -0700
Subject: [PATCH 07/32] fix TP reparameterization to sample studentt instead of
 chi2/norm

---
 pymc/gp/gp.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index f1fc4bf2d83..1553a5e0cf8 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -26,7 +26,7 @@
 from pymc.gp.util import (
     cholesky,
     conditioned_vars,
-    infer_shape,
+    infer_size,
     replace_with_value,
     solve_lower,
     solve_upper,
@@ -129,12 +129,13 @@ def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)):
     def _build_prior(self, name, X, reparameterize=True, **kwargs):
         mu = self.mean_func(X)
         cov = stabilize(self.cov_func(X))
-        shape = infer_shape(X, kwargs.pop("shape", None))
         if reparameterize:
-            v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=shape, **kwargs)
+            size = infer_size(X, kwargs.pop("size", None))
+            print("_build_prior:size", size)
+            v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=size, **kwargs)
             f = pm.Deterministic(name, mu + cholesky(cov).dot(v))
         else:
-            f = pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs)
+            f = pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
         return f
 
     def prior(self, name, X, reparameterize=True, **kwargs):
@@ -269,13 +270,12 @@ def __add__(self, other):
     def _build_prior(self, name, X, reparameterize=True, **kwargs):
         mu = self.mean_func(X)
         cov = stabilize(self.cov_func(X))
-        shape = infer_shape(X, kwargs.pop("shape", None))
         if reparameterize:
-            chi2 = pm.ChiSquared(name + "_chi2_", self.nu)
-            v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=shape, **kwargs)
-            f = pm.Deterministic(name, (at.sqrt(self.nu) / chi2) * (mu + cholesky(cov).dot(v)))
+            size = infer_size(X, kwargs.pop("size", None))
+            v = pm.StudentT(name + "_rotated_", mu=0.0, sigma=1.0, nu=self.nu, size=size, **kwargs)
+            f = pm.Deterministic(name, mu + cholesky(cov).dot(v))
         else:
-            f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, size=shape, **kwargs)
+            f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, **kwargs)
         return f
 
     def prior(self, name, X, reparameterize=True, **kwargs):
@@ -436,7 +436,7 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
         if is_observed:
             return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs)
         else:
-            # shape = infer_shape(X, kwargs.pop("shape", None))
+            # size = infer_size(X, kwargs.pop("size", None))
             return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
     def _get_given_vals(self, given):
@@ -974,8 +974,8 @@ def conditional(self, name, Xnew, **kwargs):
             constructor.
         """
         mu, cov = self._build_conditional(Xnew)
-        shape = infer_shape(Xnew, kwargs.pop("shape", None))
-        return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs)
+        size = infer_size(Xnew, kwargs.pop("size", None))
+        return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs)
 
 
 @conditioned_vars(["Xs", "y", "sigma"])
@@ -1098,8 +1098,8 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
         if is_observed:
             return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, observed=y, **kwargs)
         else:
-            shape = np.prod([len(X) for X in Xs])
-            return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=shape, **kwargs)
+            size = np.prod([len(X) for X in Xs])
+            return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs)
 
     def _build_conditional(self, Xnew, pred_noise, diag):
         Xs, y, sigma = self.Xs, self.y, self.sigma
@@ -1175,8 +1175,8 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs):
             constructor.
         """
         mu, cov = self._build_conditional(Xnew, pred_noise, False)
-        shape = infer_shape(Xnew, kwargs.pop("shape", None))
-        return pm.MvNormal(name, mu=mu, cov=cov, size=shape, **kwargs)
+        size = infer_size(Xnew, kwargs.pop("size", None))
+        return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs)
 
     def predict(self, Xnew, point=None, diag=False, pred_noise=False):
         R"""

From 03c2a13d7b58b6edd7da705c20746de2d24782e9 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 27 Oct 2021 16:29:25 -0700
Subject: [PATCH 08/32] change naming shape->size where appropriate

---
 pymc/gp/util.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 29edfcf3901..a5e18af53f5 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -65,9 +65,9 @@ def replace_with_value(vars_needed, replacements: Dict):
     return fn(**replacements)
 
 
-def infer_shape(X, n_points=None):
+def infer_size(X, n_points=None):
     R"""
-    Maybe attempt to infer the shape of a Gaussian process input matrix.
+    Maybe attempt to infer the size, or N, of a Gaussian process input matrix.
 
     If a specific shape cannot be inferred, for instance if X is symbolic, then an
     error is raised.

From 4bcb448985470b4ae9eb5c1e73d0f39f2840c321 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 27 Oct 2021 21:32:25 -0700
Subject: [PATCH 09/32] add deprecation warning for is_observed

---
 pymc/gp/gp.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 1553a5e0cf8..775d0288ffd 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -432,11 +432,14 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
         self.X = X
         self.y = y
         self.noise = noise
-        # TODO: deprecate is_observed.  untested, shouldnt be used.
         if is_observed:
             return pm.MvNormal(name, mu=mu, cov=cov, observed=y, **kwargs)
         else:
-            # size = infer_size(X, kwargs.pop("size", None))
+            warnings.warn(
+                "The 'is_observed' argument has been deprecated.  If the GP is "
+                "unobserved use gp.Latent instead.",
+                DeprecationWarning,
+            )
             return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
     def _get_given_vals(self, given):
@@ -736,6 +739,11 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw
                 **kwargs,
             )
         else:
+            warnings.warn(
+                "The 'is_observed' argument has been deprecated.  If the GP is "
+                "unobserved use gp.Latent instead.",
+                DeprecationWarning,
+            )
             return pm.DensityDist(
                 name,
                 X,

From 167cb4ea20703e1531447c6cfeccf7c61bbd3cf4 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 27 Oct 2021 22:00:02 -0700
Subject: [PATCH 10/32] add jitter arg for covs headed for cholesky decomps,
 previously fixed at 1e-6.  add deprecation warning for is_observed arg

---
 pymc/gp/gp.py   | 120 +++++++++++++++++++++++++++++++-----------------
 pymc/gp/util.py |   8 ++--
 2 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 775d0288ffd..8b75bd0f2ec 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -33,6 +33,7 @@
     stabilize,
 )
 from pymc.math import cartesian, kron_diag, kron_dot, kron_solve_lower, kron_solve_upper
+JITTER_DEFAULT = 1e-6
 
 __all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"]
 
@@ -126,9 +127,9 @@ class Latent(Base):
     def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0)):
         super().__init__(mean_func=mean_func, cov_func=cov_func)
 
-    def _build_prior(self, name, X, reparameterize=True, **kwargs):
+    def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         mu = self.mean_func(X)
-        cov = stabilize(self.cov_func(X))
+        cov = stabilize(self.cov_func(X), jitter)
         if reparameterize:
             size = infer_size(X, kwargs.pop("size", None))
             print("_build_prior:size", size)
@@ -138,7 +139,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs):
             f = pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
         return f
 
-    def prior(self, name, X, reparameterize=True, **kwargs):
+    def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         R"""
         Returns the GP prior distribution evaluated over the input
         locations `X`.
@@ -159,11 +160,14 @@ def prior(self, name, X, reparameterize=True, **kwargs):
         reparameterize: bool
             Reparameterize the distribution by rotating the random
             variable by the Cholesky factor of the covariance matrix.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to distribution constructor.
         """
 
-        f = self._build_prior(name, X, reparameterize, **kwargs)
+        f = self._build_prior(name, X, reparameterize, jitter, **kwargs)
         self.X = X
         self.f = f
         return f
@@ -183,10 +187,10 @@ def _get_given_vals(self, given):
             X, f = self.X, self.f
         return X, f, cov_total, mean_total
 
-    def _build_conditional(self, Xnew, X, f, cov_total, mean_total):
+    def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter):
         Kxx = cov_total(X)
         Kxs = self.cov_func(X, Xnew)
-        L = cholesky(stabilize(Kxx))
+        L = cholesky(stabilize(Kxx, jitter))
         A = solve_lower(L, Kxs)
         v = solve_lower(L, f - mean_total(X))
         mu = self.mean_func(Xnew) + at.dot(at.transpose(A), v)
@@ -194,7 +198,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total):
         cov = Kss - at.dot(at.transpose(A), A)
         return mu, cov
 
-    def conditional(self, name, Xnew, given=None, **kwargs):
+    def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs):
         R"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
@@ -219,12 +223,15 @@ def conditional(self, name, Xnew, given=None, **kwargs):
             Can optionally take as key value pairs: `X`, `y`, `noise`,
             and `gp`.  See the section in the documentation on additive GP
             models in PyMC for more information.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
         """
         givens = self._get_given_vals(given)
-        mu, cov = self._build_conditional(Xnew, *givens)
+        mu, cov = self._build_conditional(Xnew, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
 
@@ -267,9 +274,9 @@ def __init__(self, *, mean_func=Zero(), cov_func=Constant(0.0), nu=None):
     def __add__(self, other):
         raise TypeError("Student's T processes aren't additive")
 
-    def _build_prior(self, name, X, reparameterize=True, **kwargs):
+    def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         mu = self.mean_func(X)
-        cov = stabilize(self.cov_func(X))
+        cov = stabilize(self.cov_func(X), jitter)
         if reparameterize:
             size = infer_size(X, kwargs.pop("size", None))
             v = pm.StudentT(name + "_rotated_", mu=0.0, sigma=1.0, nu=self.nu, size=size, **kwargs)
@@ -278,7 +285,7 @@ def _build_prior(self, name, X, reparameterize=True, **kwargs):
             f = pm.MvStudentT(name, nu=self.nu, mu=mu, cov=cov, **kwargs)
         return f
 
-    def prior(self, name, X, reparameterize=True, **kwargs):
+    def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         R"""
         Returns the TP prior distribution evaluated over the input
         locations `X`.
@@ -299,7 +306,7 @@ def prior(self, name, X, reparameterize=True, **kwargs):
             Extra keyword arguments that are passed to distribution constructor.
         """
 
-        f = self._build_prior(name, X, reparameterize, **kwargs)
+        f = self._build_prior(name, X, reparameterize, jitter, **kwargs)
         self.X = X
         self.f = f
         return f
@@ -318,7 +325,7 @@ def _build_conditional(self, Xnew, X, f):
         covT = (self.nu + beta - 2) / (nu2 - 2) * cov
         return nu2, mu, covT
 
-    def conditional(self, name, Xnew, **kwargs):
+    def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
         R"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
@@ -333,6 +340,9 @@ def conditional(self, name, Xnew, **kwargs):
             Name of the random variable
         Xnew: array-like
             Function input values.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -340,7 +350,7 @@ def conditional(self, name, Xnew, **kwargs):
 
         X = self.X
         f = self.f
-        nu2, mu, cov = self._build_conditional(Xnew, X, f)
+        nu2, mu, cov = self._build_conditional(Xnew, X, f, jitter)
         return pm.MvStudentT(name, nu=nu2, mu=mu, cov=cov, **kwargs)
 
 
@@ -397,7 +407,7 @@ def _build_marginal_likelihood(self, X, noise):
         cov = Kxx + Knx
         return mu, cov
 
-    def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
+    def marginal_likelihood(self, name, X, y, noise, jitter=JITTER_DEFAULT, is_observed=True, **kwargs):
         R"""
         Returns the marginal likelihood distribution, given the input
         locations `X` and the data `y`.
@@ -421,6 +431,9 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
         noise: scalar, Variable, or Covariance
             Standard deviation of the Gaussian noise.  Can also be a Covariance for
             non-white noise.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -428,7 +441,7 @@ def marginal_likelihood(self, name, X, y, noise, is_observed=True, **kwargs):
 
         if not isinstance(noise, Covariance):
             noise = pm.gp.cov.WhiteNoise(noise)
-        mu, cov = self._build_marginal_likelihood(X, noise)
+        mu, cov = self._build_marginal_likelihood(X, noise, jitter)
         self.X = X
         self.y = y
         self.noise = noise
@@ -460,12 +473,12 @@ def _get_given_vals(self, given):
             X, y, noise = self.X, self.y, self.noise
         return X, y, noise, cov_total, mean_total
 
-    def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total):
+    def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total, jitter):
         Kxx = cov_total(X)
         Kxs = self.cov_func(X, Xnew)
         Knx = noise(X)
         rxx = y - mean_total(X)
-        L = cholesky(stabilize(Kxx) + Knx)
+        L = cholesky(stabilize(Kxx, jitter) + Knx)
         A = solve_lower(L, Kxs)
         v = solve_lower(L, rxx)
         mu = self.mean_func(Xnew) + at.dot(at.transpose(A), v)
@@ -480,9 +493,9 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mea
             cov = Kss - at.dot(at.transpose(A), A)
             if pred_noise:
                 cov += noise(Xnew)
-            return mu, cov if pred_noise else stabilize(cov)
+            return mu, cov if pred_noise else stabilize(cov, jitter)
 
-    def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
+    def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs):
         R"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
@@ -510,16 +523,19 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
             Can optionally take as key value pairs: `X`, `y`, `noise`,
             and `gp`.  See the section in the documentation on additive GP
             models in PyMC for more information.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
         """
 
         givens = self._get_given_vals(given)
-        mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens)
+        mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
-    def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
+    def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT):
         R"""
         Return the mean vector and covariance matrix of the conditional
         distribution as numpy arrays, given a `point`, such as the MAP
@@ -540,14 +556,17 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
             Default is `False`.
         given: dict
             Same as `conditional` method.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         """
         if given is None:
             given = {}
 
-        mu, cov = self._predict_at(Xnew, diag, pred_noise, given)
+        mu, cov = self._predict_at(Xnew, diag, pred_noise, given, jitter)
         return replace_with_values([mu, cov], replacements=point)
 
-    def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None):
+    def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT):
         R"""
         Return the mean vector and covariance matrix of the conditional
         distribution as symbolic variables.
@@ -654,11 +673,11 @@ def __add__(self, other):
     # Use y as first argument, so that we can use functools.partial
     # in marginal_likelihood instead of lambda. This makes pickling
     # possible.
-    def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
+    def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter):
         sigma2 = at.square(sigma)
         Kuu = self.cov_func(Xu)
         Kuf = self.cov_func(Xu, X)
-        Luu = cholesky(stabilize(Kuu))
+        Luu = cholesky(stabilize(Kuu, jitter))
         A = solve_lower(Luu, Kuf)
         Qffd = at.sum(A * A, 0)
         if self.approx == "FITC":
@@ -683,7 +702,7 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma):
         quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c))
         return -1.0 * (constant + logdet + quadratic + trace)
 
-    def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kwargs):
+    def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitter=JITTER_DEFAULT, **kwargs):
         R"""
         Returns the approximate marginal likelihood distribution, given the input
         locations `X`, inducing point locations `Xu`, data `y`, and white noise
@@ -706,6 +725,9 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw
         is_observed: bool
             Whether to set `y` as an `observed` variable in the `model`.
             Default is `True`.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -732,6 +754,7 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw
                 X,
                 Xu,
                 self.sigma,
+                jitter,
                 logp=self._build_marginal_likelihood_logp,
                 observed=y,
                 ndims_params=[2, 2, 0],
@@ -749,6 +772,7 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw
                 X,
                 Xu,
                 self.sigma,
+                jitter,
                 logp=self._build_marginal_likelihood_logp,
                 observed=y,
                 ndims_params=[2, 2, 0],
@@ -757,11 +781,11 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, **kw
                 **kwargs,
             )
 
-    def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total):
+    def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total, jitter):
         sigma2 = at.square(sigma)
         Kuu = cov_total(Xu)
         Kuf = cov_total(Xu, X)
-        Luu = cholesky(stabilize(Kuu))
+        Luu = cholesky(stabilize(Kuu, jitter))
         A = solve_lower(Luu, Kuf)
         Qffd = at.sum(A * A, 0)
         if self.approx == "FITC":
@@ -788,7 +812,7 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total,
             cov = self.cov_func(Xnew) - at.dot(at.transpose(As), As) + at.dot(at.transpose(C), C)
             if pred_noise:
                 cov += sigma2 * at.identity_like(cov)
-            return mu, cov if pred_noise else stabilize(cov)
+            return mu, cov if pred_noise else stabilize(cov, jitter)
 
     def _get_given_vals(self, given):
         if given is None:
@@ -805,7 +829,7 @@ def _get_given_vals(self, given):
             X, Xu, y, sigma = self.X, self.Xu, self.y, self.sigma
         return X, Xu, y, sigma, cov_total, mean_total
 
-    def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
+    def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs):
         R"""
         Returns the approximate conditional distribution of the GP evaluated over
         new input locations `Xnew`.
@@ -824,13 +848,16 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, **kwargs):
             Can optionally take as key value pairs: `X`, `Xu`, `y`, `noise`,
             and `gp`.  See the section in the documentation on additive GP
             models in PyMC for more information.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
         """
 
         givens = self._get_given_vals(given)
-        mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens)
+        mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
 
@@ -899,16 +926,16 @@ def __init__(self, *, mean_func=Zero(), cov_funcs=(Constant(0.0))):
     def __add__(self, other):
         raise TypeError("Additive, Kronecker-structured processes not implemented")
 
-    def _build_prior(self, name, Xs, **kwargs):
+    def _build_prior(self, name, Xs, jitter, **kwargs):
         self.N = np.prod([len(X) for X in Xs])
         mu = self.mean_func(cartesian(*Xs))
-        chols = [cholesky(stabilize(cov(X))) for cov, X in zip(self.cov_funcs, Xs)]
+        chols = [cholesky(stabilize(cov(X, jitter))) for cov, X in zip(self.cov_funcs, Xs)]
         # remove reparameterization option
         v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=self.N, **kwargs)
         f = pm.Deterministic(name, mu + at.flatten(kron_dot(chols, v)))
         return f
 
-    def prior(self, name, Xs, **kwargs):
+    def prior(self, name, Xs, jitter, **kwargs):
         """
         Returns the prior distribution evaluated over the input
         locations `Xs`.
@@ -922,22 +949,25 @@ def prior(self, name, Xs, **kwargs):
             must be passable to its respective covariance without error. The
             total covariance function is measured on the full grid
             `cartesian(*Xs)`.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to the `KroneckerNormal`
             distribution constructor.
         """
         if len(Xs) != len(self.cov_funcs):
             raise ValueError("Must provide a covariance function for each X")
-        f = self._build_prior(name, Xs, **kwargs)
+        f = self._build_prior(name, Xs, jitter, **kwargs)
         self.Xs = Xs
         self.f = f
         return f
 
-    def _build_conditional(self, Xnew):
+    def _build_conditional(self, Xnew, jitter):
         Xs, f = self.Xs, self.f
         X = cartesian(*Xs)
         delta = f - self.mean_func(X)
-        covs = [stabilize(cov(Xi)) for cov, Xi in zip(self.cov_funcs, Xs)]
+        covs = [stabilize(cov(Xi), jitter) for cov, Xi in zip(self.cov_funcs, Xs)]
         chols = [cholesky(cov) for cov in covs]
         cholTs = [at.transpose(chol) for chol in chols]
         Kss = self.cov_func(Xnew)
@@ -947,10 +977,10 @@ def _build_conditional(self, Xnew):
         alpha = kron_solve_upper(cholTs, alpha)
         mu = at.dot(Ksx, alpha).ravel() + self.mean_func(Xnew)
         A = kron_solve_lower(chols, Kxs)
-        cov = stabilize(Kss - at.dot(at.transpose(A), A))
+        cov = stabilize(Kss - at.dot(at.transpose(A), A), jitter)
         return mu, cov
 
-    def conditional(self, name, Xnew, **kwargs):
+    def conditional(self, name, Xnew, jitter, **kwargs):
         """
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
@@ -977,11 +1007,14 @@ def conditional(self, name, Xnew, **kwargs):
         Xnew: array-like
             Function input values.  If one-dimensional, must be a column
             vector with shape `(n, 1)`.
+        jitter: scalar
+            A small correction added to the diagonal of positive semi-definite 
+            covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
         """
-        mu, cov = self._build_conditional(Xnew)
+        mu, cov = self._build_conditional(Xnew, jitter)
         size = infer_size(Xnew, kwargs.pop("size", None))
         return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs)
 
@@ -1106,6 +1139,11 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
         if is_observed:
             return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, observed=y, **kwargs)
         else:
+            warnings.warn(
+                "The 'is_observed' argument has been deprecated.  If the GP is "
+                "unobserved use gp.LatentKron instead.",
+                DeprecationWarning,
+            )
             size = np.prod([len(X) for X in Xs])
             return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs)
 
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index a5e18af53f5..462f84355c8 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -88,22 +88,22 @@ def infer_size(X, n_points=None):
     return n_points
 
 
-def stabilize(K, c=1e-6):
+def stabilize(K, jitter=1e-6):
     R"""
     Adds small diagonal to a covariance matrix.
 
     Often the matrices calculated from covariance functions, `K = cov_func(X)`
     do not appear numerically to be positive semi-definite.  Adding a small
-    correction, `c`, to the diagonal is usually enough to fix this.
+    correction, `jitter`, to the diagonal is usually enough to fix this.
 
     Parameters
     ----------
     K: array-like
         A square covariance or kernel matrix.
-    c: float
+    jitter: float
         A small constant.
     """
-    return K + c * at.identity_like(K)
+    return K + jitter * at.identity_like(K)
 
 
 def kmeans_inducing_points(n_inducing, X):

From e18b8c47d5395a6a4c32a9f0061198bc8680c161 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 27 Oct 2021 22:11:34 -0700
Subject: [PATCH 11/32] clean up trivial aesara.function usage to .eval()
 instead

---
 pymc/tests/test_gp.py | 224 +++++++++++++++++++++---------------------
 1 file changed, 112 insertions(+), 112 deletions(-)

diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index d57adbabccb..297ab35df0c 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -34,7 +34,7 @@ def test_value(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             zero_mean = pm.gp.mean.Zero()
-        M = aesara.function([], zero_mean(X))()
+        M = zero_mean(X).eval()
         assert np.all(M == 0)
         assert M.shape == (10,)
 
@@ -44,7 +44,7 @@ def test_value(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             const_mean = pm.gp.mean.Constant(6)
-        M = aesara.function([], const_mean(X))()
+        M = const_mean(X).eval()
         assert np.all(M == 6)
         assert M.shape == (10,)
 
@@ -54,7 +54,7 @@ def test_value(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             linear_mean = pm.gp.mean.Linear(2, 0.5)
-        M = aesara.function([], linear_mean(X))()
+        M = linear_mean.eval()
         npt.assert_allclose(M[1], 0.7222, atol=1e-3)
         assert M.shape == (10,)
 
@@ -66,7 +66,7 @@ def test_add(self):
             mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5)
             mean2 = pm.gp.mean.Constant(2)
             mean = mean1 + mean2 + mean2
-        M = aesara.function([], mean(X))()
+        M = mean(X).eval()
         npt.assert_allclose(M[1], 0.7222 + 2 + 2, atol=1e-3)
 
     def test_prod(self):
@@ -75,7 +75,7 @@ def test_prod(self):
             mean1 = pm.gp.mean.Linear(coeffs=2, intercept=0.5)
             mean2 = pm.gp.mean.Constant(2)
             mean = mean1 * mean2 * mean2
-        M = aesara.function([], mean(X))()
+        M = mean(X).eval()
         npt.assert_allclose(M[1], 0.7222 * 2 * 2, atol=1e-3)
 
     def test_add_multid(self):
@@ -86,7 +86,7 @@ def test_add_multid(self):
             mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b)
             mean2 = pm.gp.mean.Constant(2)
             mean = mean1 + mean2 + mean2
-        M = aesara.function([], mean(X))()
+        M = mean(X).eval()
         npt.assert_allclose(M[1], 10.8965 + 2 + 2, atol=1e-3)
 
     def test_prod_multid(self):
@@ -97,7 +97,7 @@ def test_prod_multid(self):
             mean1 = pm.gp.mean.Linear(coeffs=A, intercept=b)
             mean2 = pm.gp.mean.Constant(2)
             mean = mean1 * mean2 * mean2
-        M = aesara.function([], mean(X))()
+        M = mean(X).eval()
         npt.assert_allclose(M[1], 10.8965 * 2 * 2, atol=1e-3)
 
 
@@ -108,10 +108,10 @@ def test_symadd_cov(self):
             cov1 = pm.gp.cov.ExpQuad(1, 0.1)
             cov2 = pm.gp.cov.ExpQuad(1, 0.1)
             cov = cov1 + cov2
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_rightadd_scalar(self):
@@ -119,10 +119,10 @@ def test_rightadd_scalar(self):
         with pm.Model() as model:
             a = 1
             cov = pm.gp.cov.ExpQuad(1, 0.1) + a
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_leftadd_scalar(self):
@@ -130,10 +130,10 @@ def test_leftadd_scalar(self):
         with pm.Model() as model:
             a = 1
             cov = a + pm.gp.cov.ExpQuad(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 1.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_rightadd_matrix(self):
@@ -141,10 +141,10 @@ def test_rightadd_matrix(self):
         M = 2 * np.ones((10, 10))
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(1, 0.1) + M
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_leftadd_matrixt(self):
@@ -152,10 +152,10 @@ def test_leftadd_matrixt(self):
         M = 2 * at.ones((10, 10))
         with pm.Model() as model:
             cov = M + pm.gp.cov.ExpQuad(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 2.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_leftprod_matrix(self):
@@ -164,8 +164,8 @@ def test_leftprod_matrix(self):
         with pm.Model() as model:
             cov = M + pm.gp.cov.ExpQuad(1, 0.1)
             cov_true = pm.gp.cov.ExpQuad(1, 0.1) + M
-        K = aesara.function([], cov(X))()
-        K_true = aesara.function([], cov_true(X))()
+        K = cov(X).eval()
+        K_true = cov_true(X).eval()
         assert np.allclose(K, K_true)
 
     def test_inv_rightadd(self):
@@ -181,10 +181,10 @@ def test_symprod_cov(self):
             cov1 = pm.gp.cov.ExpQuad(1, 0.1)
             cov2 = pm.gp.cov.ExpQuad(1, 0.1)
             cov = cov1 * cov2
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.53940 * 0.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_rightprod_scalar(self):
@@ -192,10 +192,10 @@ def test_rightprod_scalar(self):
         with pm.Model() as model:
             a = 2
             cov = pm.gp.cov.ExpQuad(1, 0.1) * a
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_leftprod_scalar(self):
@@ -203,10 +203,10 @@ def test_leftprod_scalar(self):
         with pm.Model() as model:
             a = 2
             cov = a * pm.gp.cov.ExpQuad(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_rightprod_matrix(self):
@@ -214,10 +214,10 @@ def test_rightprod_matrix(self):
         M = 2 * np.ones((10, 10))
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(1, 0.1) * M
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 2 * 0.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_leftprod_matrix(self):
@@ -226,8 +226,8 @@ def test_leftprod_matrix(self):
         with pm.Model() as model:
             cov = M * pm.gp.cov.ExpQuad(1, 0.1)
             cov_true = pm.gp.cov.ExpQuad(1, 0.1) * M
-        K = aesara.function([], cov(X))()
-        K_true = aesara.function([], cov_true(X))()
+        K = cov(X).eval()
+        K_true = cov_true(X).eval()
         assert np.allclose(K, K_true)
 
     def test_multiops(self):
@@ -244,12 +244,12 @@ def test_multiops(self):
                 + pm.gp.cov.ExpQuad(1, 0.1)
                 + 3
             )
-        K1 = aesara.function([], cov1(X))()
-        K2 = aesara.function([], cov2(X))()
+        K1 = cov1(X).eval()
+        K2 = cov2(X).eval()
         assert np.allclose(K1, K2)
         # check diagonal
-        K1d = aesara.function([], cov1(X, diag=True))()
-        K2d = aesara.function([], cov2(X, diag=True))()
+        K1d = cov1(X, diag=True).eval()
+        K2d = cov2(X, diag=True).eval()
         npt.assert_allclose(np.diag(K1), K2d, atol=1e-5)
         npt.assert_allclose(np.diag(K2), K1d, atol=1e-5)
 
@@ -265,10 +265,10 @@ def test_symexp_cov(self):
         with pm.Model() as model:
             cov1 = pm.gp.cov.ExpQuad(1, 0.1)
             cov = cov1 ** 2
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_covexp_numpy(self):
@@ -276,10 +276,10 @@ def test_covexp_numpy(self):
         with pm.Model() as model:
             a = np.array([[2]])
             cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_covexp_aesara(self):
@@ -287,10 +287,10 @@ def test_covexp_aesara(self):
         with pm.Model() as model:
             a = at.alloc(2.0, 1, 1)
             cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_covexp_shared(self):
@@ -298,10 +298,10 @@ def test_covexp_shared(self):
         with pm.Model() as model:
             a = aesara.shared(2.0)
             cov = pm.gp.cov.ExpQuad(1, 0.1) ** a
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.53940 ** 2, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_invalid_covexp(self):
@@ -321,11 +321,11 @@ def test_symprod_cov(self):
             cov1 = pm.gp.cov.ExpQuad(1, 0.1)
             cov2 = pm.gp.cov.ExpQuad(1, 0.1)
             cov = pm.gp.cov.Kron([cov1, cov2])
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 1 * 0.53940, atol=1e-3)
         npt.assert_allclose(K[0, 11], 0.53940 * 0.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_multiops(self):
@@ -342,8 +342,8 @@ def test_multiops(self):
             )
             cov2 = pm.gp.cov.ExpQuad(1, 0.1) * pm.gp.cov.ExpQuad(2, 0.1)
             cov = pm.gp.cov.Kron([cov1, cov2])
-        K_true = kronecker(aesara.function([], cov1(X1))(), aesara.function([], cov2(X2))()).eval()
-        K = aesara.function([], cov(X))()
+        K_true = kronecker(cov1(X1).eval(), cov2(X2).eval()).eval()
+        K = cov(X).eval()
         npt.assert_allclose(K_true, K)
 
 
@@ -352,30 +352,30 @@ def test_slice1(self):
         X = np.linspace(0, 1, 30).reshape(10, 3)
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(3, 0.1, active_dims=[0, 0, 1])
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.20084298, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_slice2(self):
         X = np.linspace(0, 1, 30).reshape(10, 3)
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(3, ls=[0.1, 0.1], active_dims=[1, 2])
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_slice3(self):
         X = np.linspace(0, 1, 30).reshape(10, 3)
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(3, ls=np.array([0.1, 0.1]), active_dims=[1, 2])
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.34295549, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_diffslice(self):
@@ -384,10 +384,10 @@ def test_diffslice(self):
             cov = pm.gp.cov.ExpQuad(3, ls=0.1, active_dims=[1, 0, 0]) + pm.gp.cov.ExpQuad(
                 3, ls=[0.1, 0.2, 0.3]
             )
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.683572, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_raises(self):
@@ -402,7 +402,7 @@ def test_stable(self):
         X = np.random.uniform(low=320.0, high=400.0, size=[2000, 2])
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(2, 0.1)
-        dists = aesara.function([], cov.square_dist(X, X))()
+        dists = cov.square_dist(X, X).eval()
         assert not np.any(dists < 0)
 
 
@@ -411,44 +411,44 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_2d(self):
         X = np.linspace(0, 1, 10).reshape(5, 2)
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(2, 0.5)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.820754, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_2dard(self):
         X = np.linspace(0, 1, 10).reshape(5, 2)
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(2, np.array([1, 2]))
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.969607, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_inv_lengthscale(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.ExpQuad(1, ls_inv=10)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.53940, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -457,14 +457,14 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.WhiteNoise(sigma=0.5)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.0, atol=1e-3)
         npt.assert_allclose(K[0, 0], 0.5 ** 2, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
         # check predict
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.0, atol=1e-3)
         # white noise predicting should return all zeros
         npt.assert_allclose(K[0, 0], 0.0, atol=1e-3)
@@ -475,14 +475,14 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Constant(2.5)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 2.5, atol=1e-3)
         npt.assert_allclose(K[0, 0], 2.5, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 2.5, atol=1e-3)
         npt.assert_allclose(K[0, 0], 2.5, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -491,12 +491,12 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.RatQuad(1, ls=0.1, alpha=0.5)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.66896, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -505,12 +505,12 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Exponential(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.57375, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -519,12 +519,12 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Matern52(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.46202, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -533,12 +533,12 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Matern32(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.42682, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -547,11 +547,11 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Matern12(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.32919, atol=1e-3)
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -560,12 +560,12 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Cosine(1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.766, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.766, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -574,12 +574,12 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Periodic(1, 0.1, 0.1)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.00288, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -588,12 +588,12 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Linear(1, 0.5)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.19444, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -602,12 +602,12 @@ def test_1d(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             cov = pm.gp.cov.Polynomial(1, 0.5, 2, 0)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.03780, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
 
@@ -621,12 +621,12 @@ def warp_func(x, a, b, c):
         with pm.Model() as model:
             cov_m52 = pm.gp.cov.Matern52(1, 0.2)
             cov = pm.gp.cov.WarpedInput(1, warp_func=warp_func, args=(1, 10, 1), cov_func=cov_m52)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 0.79593, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_raises(self):
@@ -646,12 +646,12 @@ def tanh_func(x, x1, x2, w, x0):
 
         with pm.Model() as model:
             cov = pm.gp.cov.Gibbs(1, tanh_func, args=(0.05, 0.6, 0.4, 1.0))
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[2, 3], 0.136683, atol=1e-4)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_raises(self):
@@ -673,12 +673,12 @@ def scaling_func(x, a, b):
         with pm.Model() as model:
             cov_m52 = pm.gp.cov.Matern52(1, 0.2)
             cov = pm.gp.cov.ScaledCov(1, scaling_func=scaling_func, args=(2, -1), cov_func=cov_m52)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], 3.00686, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_raises(self):
@@ -1210,12 +1210,12 @@ def test_1d_tau1(self):
         etalon = 0.600881
         with pm.Model():
             cov = pm.gp.cov.Circular(1, 1, tau=5)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
 
     def test_1d_tau2(self):
@@ -1223,10 +1223,10 @@ def test_1d_tau2(self):
         etalon = 0.691239
         with pm.Model():
             cov = pm.gp.cov.Circular(1, 1, tau=4)
-        K = aesara.function([], cov(X))()
+        K = cov(X).eval()
         npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
-        K = aesara.function([], cov(X, X))()
+        K = cov(X, X).eval()
         npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
         # check diagonal
-        Kd = aesara.function([], cov(X, diag=True))()
+        Kd = cov(X, diag=True).eval()
         npt.assert_allclose(np.diag(K), Kd, atol=1e-5)

From 212fb1ebcdfce85f06476c999af3d0ef577aab16 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Thu, 28 Oct 2021 00:19:36 -0700
Subject: [PATCH 12/32] fix gp.util.replace_with_values to handle case with no
 symbolic values, .eval() works

---
 pymc/gp/util.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 462f84355c8..39fe62e9bea 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -14,7 +14,7 @@
 
 import warnings
 
-from typing import Dict
+from typing import Dict, Optional
 
 import aesara.tensor as at
 import numpy as np
@@ -36,11 +36,14 @@
 from pymc.aesaraf import compile_rv_inplace, walk_model
 
 
-def replace_with_value(vars_needed, replacements: Dict):
+# TODO: add test
+def replace_with_values(model, vars_needed, replacements=None):
     R"""
-    Replace random variable nodes in the graph with values given in replacements.
-
+    Replace random variable nodes in the graph with values given by the replacements dict.
+    Uses untransformed versions of the inputs, performs some basic input validation.
+    
     NOTE TO REVIEWER:  Modified this from `sample_posterior_predictive`.  Is there a better way to do this?
+    
     """
     inputs, input_names = [], []
     for rv in walk_model(vars_needed, walk_past_rvs=True):
@@ -48,6 +51,10 @@ def replace_with_value(vars_needed, replacements: Dict):
             inputs.append(rv)
             input_names.append(rv.name)
 
+    # then deterministic, no inputs are required, can eval and return
+    if len(inputs) == 0:
+        return tuple([v.eval() for v in vars_needed])
+            
     fn = compile_rv_inplace(
         inputs,
         vars_needed,
@@ -57,9 +64,9 @@ def replace_with_value(vars_needed, replacements: Dict):
     )
 
     replacements = {name: val for name, val in replacements.items() if name in input_names}
-    missing = set(needed) - set(replacements.keys())
+    missing = set(vars_needed) - set(replacements.keys())
     if len(missing) > 0:
-        missing_str = ", ".join(missing)
+        missing_str = ", ".join({m.name for m in missing})
         raise ValueError(f"Values for {missing_str} must be included in `replacements`.")
 
     return fn(**replacements)

From ad4a1cabeefc88f963054669aef6489357e26e56 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Thu, 28 Oct 2021 00:20:52 -0700
Subject: [PATCH 13/32] jitter=0 for conditonals/predicts, fix
 replace_with_values calls

---
 pymc/gp/gp.py | 71 +++++++++++++++++++++++++++++----------------------
 1 file changed, 40 insertions(+), 31 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 8b75bd0f2ec..9e2a8d318a9 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -27,12 +27,15 @@
     cholesky,
     conditioned_vars,
     infer_size,
-    replace_with_value,
+    replace_with_values,
     solve_lower,
     solve_upper,
     stabilize,
 )
 from pymc.math import cartesian, kron_diag, kron_dot, kron_solve_lower, kron_solve_upper
+from pymc.distributions.distribution import NoDistribution
+from pymc.model import modelcontext
+
 JITTER_DEFAULT = 1e-6
 
 __all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"]
@@ -53,7 +56,7 @@ def __add__(self, other):
             raise TypeError("Cannot add different GP types")
         mean_total = self.mean_func + other.mean_func
         cov_total = self.cov_func + other.cov_func
-        return self.__class__(mean_total, cov_total)
+        return self.__class__(mean_func=mean_total, cov_func=cov_total)
 
     def prior(self, name, X, *args, **kwargs):
         raise NotImplementedError
@@ -64,7 +67,7 @@ def marginal_likelihood(self, name, X, *args, **kwargs):
     def conditional(self, name, Xnew, *args, **kwargs):
         raise NotImplementedError
 
-    def predict(self, Xnew, point=None, given=None, diag=False):
+    def predict(self, Xnew, point=None, given=None, diag=False, model=None):
         raise NotImplementedError
 
 
@@ -132,7 +135,6 @@ def _build_prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kw
         cov = stabilize(self.cov_func(X), jitter)
         if reparameterize:
             size = infer_size(X, kwargs.pop("size", None))
-            print("_build_prior:size", size)
             v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=size, **kwargs)
             f = pm.Deterministic(name, mu + cholesky(cov).dot(v))
         else:
@@ -198,7 +200,7 @@ def _build_conditional(self, Xnew, X, f, cov_total, mean_total, jitter):
         cov = Kss - at.dot(at.transpose(A), A)
         return mu, cov
 
-    def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs):
+    def conditional(self, name, Xnew, given=None, jitter=0.0, **kwargs):
         R"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
@@ -225,7 +227,8 @@ def conditional(self, name, Xnew, given=None, jitter=JITTER_DEFAULT, **kwargs):
             models in PyMC for more information.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite 
-            covariance matrices to ensure numerical stability.  Default value is 1e-6.
+            covariance matrices to ensure numerical stability.  For conditionals
+            the default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -311,11 +314,11 @@ def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
         self.f = f
         return f
 
-    def _build_conditional(self, Xnew, X, f):
+    def _build_conditional(self, Xnew, X, f, jitter):
         Kxx = self.cov_func(X)
         Kxs = self.cov_func(X, Xnew)
         Kss = self.cov_func(Xnew)
-        L = cholesky(stabilize(Kxx))
+        L = cholesky(stabilize(Kxx, jitter))
         A = solve_lower(L, Kxs)
         cov = Kss - at.dot(at.transpose(A), A)
         v = solve_lower(L, f - self.mean_func(X))
@@ -325,7 +328,7 @@ def _build_conditional(self, Xnew, X, f):
         covT = (self.nu + beta - 2) / (nu2 - 2) * cov
         return nu2, mu, covT
 
-    def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
+    def conditional(self, name, Xnew, jitter=0.0, **kwargs):
         R"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
@@ -342,7 +345,8 @@ def conditional(self, name, Xnew, jitter=JITTER_DEFAULT, **kwargs):
             Function input values.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite 
-            covariance matrices to ensure numerical stability.  Default value is 1e-6.
+            covariance matrices to ensure numerical stability.  For conditionals
+            the default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -400,14 +404,14 @@ class Marginal(Base):
             fcond = gp.conditional("fcond", Xnew=Xnew)
     """
 
-    def _build_marginal_likelihood(self, X, noise):
+    def _build_marginal_likelihood(self, X, noise, jitter):
         mu = self.mean_func(X)
         Kxx = self.cov_func(X)
         Knx = noise(X)
         cov = Kxx + Knx
-        return mu, cov
+        return mu, stabilize(cov, jitter)
 
-    def marginal_likelihood(self, name, X, y, noise, jitter=JITTER_DEFAULT, is_observed=True, **kwargs):
+    def marginal_likelihood(self, name, X, y, noise, jitter=0.0, is_observed=True, **kwargs):
         R"""
         Returns the marginal likelihood distribution, given the input
         locations `X` and the data `y`.
@@ -433,7 +437,7 @@ def marginal_likelihood(self, name, X, y, noise, jitter=JITTER_DEFAULT, is_obser
             non-white noise.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite 
-            covariance matrices to ensure numerical stability.  Default value is 1e-6.
+            covariance matrices to ensure numerical stability.  Default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -495,7 +499,7 @@ def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mea
                 cov += noise(Xnew)
             return mu, cov if pred_noise else stabilize(cov, jitter)
 
-    def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs):
+    def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kwargs):
         R"""
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
@@ -525,7 +529,8 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DE
             models in PyMC for more information.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite 
-            covariance matrices to ensure numerical stability.  Default value is 1e-6.
+            covariance matrices to ensure numerical stability.  For conditionals
+            the default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -535,7 +540,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DE
         mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
-    def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT):
+    def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=0.0, model=None):
         R"""
         Return the mean vector and covariance matrix of the conditional
         distribution as numpy arrays, given a `point`, such as the MAP
@@ -558,15 +563,16 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, ji
             Same as `conditional` method.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite 
-            covariance matrices to ensure numerical stability.  Default value is 1e-6.
+            covariance matrices to ensure numerical stability.  For conditionals
+            the default value is 0.0.
         """
         if given is None:
             given = {}
-
+        model = modelcontext(model)
         mu, cov = self._predict_at(Xnew, diag, pred_noise, given, jitter)
-        return replace_with_values([mu, cov], replacements=point)
+        return replace_with_values(model, [mu, cov], replacements=point)
 
-    def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=JITTER_DEFAULT):
+    def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=0.0):
         R"""
         Return the mean vector and covariance matrix of the conditional
         distribution as symbolic variables.
@@ -586,7 +592,7 @@ def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=JIT
             Same as `conditional` method.
         """
         givens = self._get_given_vals(given)
-        mu, cov = self._build_conditional(Xnew, pred_noise, diag, *givens)
+        mu, cov = self._build_conditional(Xnew, pred_noise, diag, *givens, jitter)
         return mu, cov
 
 
@@ -702,7 +708,7 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter):
         quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c))
         return -1.0 * (constant + logdet + quadratic + trace)
 
-    def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitter=JITTER_DEFAULT, **kwargs):
+    def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitter=0.0, **kwargs):
         R"""
         Returns the approximate marginal likelihood distribution, given the input
         locations `X`, inducing point locations `Xu`, data `y`, and white noise
@@ -727,7 +733,7 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitt
             Default is `True`.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite 
-            covariance matrices to ensure numerical stability.  Default value is 1e-6.
+            covariance matrices to ensure numerical stability.  Default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -829,7 +835,7 @@ def _get_given_vals(self, given):
             X, Xu, y, sigma = self.X, self.Xu, self.y, self.sigma
         return X, Xu, y, sigma, cov_total, mean_total
 
-    def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DEFAULT, **kwargs):
+    def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kwargs):
         R"""
         Returns the approximate conditional distribution of the GP evaluated over
         new input locations `Xnew`.
@@ -850,7 +856,8 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DE
             models in PyMC for more information.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite 
-            covariance matrices to ensure numerical stability.  Default value is 1e-6.
+            covariance matrices to ensure numerical stability.  For conditionals
+            the default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -859,7 +866,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=JITTER_DE
         givens = self._get_given_vals(given)
         mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
-
+    
 
 @conditioned_vars(["Xs", "f"])
 class LatentKron(Base):
@@ -980,7 +987,7 @@ def _build_conditional(self, Xnew, jitter):
         cov = stabilize(Kss - at.dot(at.transpose(A), A), jitter)
         return mu, cov
 
-    def conditional(self, name, Xnew, jitter, **kwargs):
+    def conditional(self, name, Xnew, jitter=0.0, **kwargs):
         """
         Returns the conditional distribution evaluated over new input
         locations `Xnew`.
@@ -1009,7 +1016,8 @@ def conditional(self, name, Xnew, jitter, **kwargs):
             vector with shape `(n, 1)`.
         jitter: scalar
             A small correction added to the diagonal of positive semi-definite 
-            covariance matrices to ensure numerical stability.  Default value is 1e-6.
+            covariance matrices to ensure numerical stability.  For conditionals
+            the default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
@@ -1224,7 +1232,7 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs):
         size = infer_size(Xnew, kwargs.pop("size", None))
         return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs)
 
-    def predict(self, Xnew, point=None, diag=False, pred_noise=False):
+    def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None):
         R"""
         Return the mean vector and covariance matrix of the conditional
         distribution as numpy arrays, given a `point`, such as the MAP
@@ -1244,8 +1252,9 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False):
             Whether or not observation noise is included in the conditional.
             Default is `False`.
         """
+        model = modelcontext(model)
         mu, cov = self._predict_at(Xnew, pred_noise, diag)
-        return replace_with_values([mu, cov], replacements=point)
+        return replace_with_values(model, [mu, cov], replacements=point)
 
     def _predict_at(self, Xnew, diag=False, pred_noise=False):
         R"""

From a4a4f99b530f71eac7cddaab54c8dd443c5179a8 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Thu, 28 Oct 2021 00:25:31 -0700
Subject: [PATCH 14/32] fix more tests

- use model.logp instead of variable.logp
- set req kwargs cov_func and mean_func
- fix weirdly small scale on some input X, y
- move predict calls into model block
- the two kron models outstanding
---
 pymc/tests/test_gp.py | 81 ++++++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 44 deletions(-)

diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index 297ab35df0c..abaac22b604 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -54,7 +54,7 @@ def test_value(self):
         X = np.linspace(0, 1, 10)[:, None]
         with pm.Model() as model:
             linear_mean = pm.gp.mean.Linear(2, 0.5)
-        M = linear_mean.eval()
+        M = linear_mean(X).eval()
         npt.assert_allclose(M[1], 0.7222, atol=1e-3)
         assert M.shape == (10,)
 
@@ -767,17 +767,16 @@ def test_raises3(self):
                 B = pm.gp.cov.Coregion(1)
 
 
-# @pytest.mark.xfail(reason="MvNormal was not yet refactored")
 class TestMarginalVsLatent:
     R"""
     Compare the logp of models Marginal, noise=0 and Latent.
     """
 
     def setup_method(self):
-        X = np.random.randn(50, 3)
-        y = np.random.randn(50) * 0.01
-        Xnew = np.random.randn(60, 3)
-        pnew = np.random.randn(60) * 0.01
+        X = np.random.randn(20, 3)
+        y = np.random.randn(20)
+        Xnew = np.random.randn(30, 3)
+        pnew = np.random.randn(30)
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
@@ -813,7 +812,6 @@ def testLatent2(self):
         npt.assert_allclose(latent_logp, self.logp, atol=5)
 
 
-@pytest.mark.xfail(reason="The `gp.predict` method was not yet refactored")
 class TestMarginalVsMarginalSparse:
     R"""
     Compare logp of models Marginal and MarginalSparse.
@@ -822,9 +820,9 @@ class TestMarginalVsMarginalSparse:
 
     def setup_method(self):
         X = np.random.randn(50, 3)
-        y = np.random.randn(50) * 0.01
+        y = np.random.randn(50)
         Xnew = np.random.randn(60, 3)
-        pnew = np.random.randn(60) * 0.01
+        pnew = np.random.randn(60)
         with pm.Model() as model:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
@@ -858,8 +856,8 @@ def testPredictVar(self, approx):
             mean_func = pm.gp.mean.Constant(0.5)
             gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx=approx)
             f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma)
-        mu1, var1 = self.gp.predict(self.Xnew, diag=True)
-        mu2, var2 = gp.predict(self.Xnew, diag=True)
+            mu1, var1 = self.gp.predict(self.Xnew, diag=True)
+            mu2, var2 = gp.predict(self.Xnew, diag=True)
         npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3)
         npt.assert_allclose(var1, var2, atol=0, rtol=1e-3)
 
@@ -869,8 +867,8 @@ def testPredictCov(self):
             mean_func = pm.gp.mean.Constant(0.5)
             gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx="DTC")
             f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False)
-        mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True)
-        mu2, cov2 = gp.predict(self.Xnew, pred_noise=True)
+            mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True)
+            mu2, cov2 = gp.predict(self.Xnew, pred_noise=True)
         npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3)
         npt.assert_allclose(cov1, cov2, atol=0, rtol=1e-3)
 
@@ -878,7 +876,7 @@ def testPredictCov(self):
 class TestGPAdditive:
     def setup_method(self):
         self.X = np.random.randn(50, 3)
-        self.y = np.random.randn(50) * 0.01
+        self.y = np.random.randn(50)
         self.Xnew = np.random.randn(60, 3)
         self.noise = pm.gp.cov.WhiteNoise(0.1)
         self.covs = (
@@ -888,19 +886,18 @@ def setup_method(self):
         )
         self.means = (pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5), pm.gp.mean.Constant(0.5))
 
-    # @pytest.mark.xfail(reason="MvNormal was not yet refactored")
     def testAdditiveMarginal(self):
         with pm.Model() as model1:
-            gp1 = pm.gp.Marginal(self.means[0], self.covs[0])
-            gp2 = pm.gp.Marginal(self.means[1], self.covs[1])
-            gp3 = pm.gp.Marginal(self.means[2], self.covs[2])
+            gp1 = pm.gp.Marginal(mean_func=self.means[0], cov_func=self.covs[0])
+            gp2 = pm.gp.Marginal(mean_func=self.means[1], cov_func=self.covs[1])
+            gp3 = pm.gp.Marginal(mean_func=self.means[2], cov_func=self.covs[2])
 
             gpsum = gp1 + gp2 + gp3
             fsum = gpsum.marginal_likelihood("f", self.X, self.y, noise=self.noise)
             model1_logp = model1.logp({"fsum": self.y})
 
         with pm.Model() as model2:
-            gptot = pm.gp.Marginal(reduce(add, self.means), reduce(add, self.covs))
+            gptot = pm.gp.Marginal(mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs))
             fsum = gptot.marginal_likelihood("f", self.X, self.y, noise=self.noise)
             model2_logp = model2.logp({"fsum": self.y})
         npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)
@@ -913,17 +910,18 @@ def testAdditiveMarginal(self):
             fp2 = gptot.conditional("fp2", self.Xnew)
 
         fp = np.random.randn(self.Xnew.shape[0])
-        npt.assert_allclose(fp1.logp({"fp1": fp}), fp2.logp({"fp2": fp}), atol=0, rtol=1e-2)
+        logp1 = model1.logp({"fp1": fp})
+        logp2 = model2.logp({"fp2": fp})
+        npt.assert_allclose(logp1, logp2, atol=0, rtol=1e-2)
 
-    @pytest.mark.xfail(reason="MarginalSparse must be refactored to not use DensityDist")
     @pytest.mark.parametrize("approx", ["FITC", "VFE", "DTC"])
     def testAdditiveMarginalSparse(self, approx):
         Xu = np.random.randn(10, 3)
         sigma = 0.1
         with pm.Model() as model1:
-            gp1 = pm.gp.MarginalSparse(self.means[0], self.covs[0], approx=approx)
-            gp2 = pm.gp.MarginalSparse(self.means[1], self.covs[1], approx=approx)
-            gp3 = pm.gp.MarginalSparse(self.means[2], self.covs[2], approx=approx)
+            gp1 = pm.gp.MarginalSparse(mean_func=self.means[0], cov_func=self.covs[0], approx=approx)
+            gp2 = pm.gp.MarginalSparse(mean_func=self.means[1], cov_func=self.covs[1], approx=approx)
+            gp3 = pm.gp.MarginalSparse(mean_func=self.means[2], cov_func=self.covs[2], approx=approx)
 
             gpsum = gp1 + gp2 + gp3
             fsum = gpsum.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma)
@@ -946,24 +944,23 @@ def testAdditiveMarginalSparse(self, approx):
         with model2:
             fp2 = gptot.conditional("fp2", self.Xnew)
 
-        fp = np.random.randn(self.Xnew.shape[0], 1)
+        fp = np.random.randn(self.Xnew.shape[0])
         npt.assert_allclose(
             pm.logp(fp1, fp1).eval({fp1: fp}), pm.logp(fp2, fp2).eval({fp2: fp}), atol=0, rtol=1e-2
         )
 
-    @pytest.mark.xfail(reason="MvNormal was not yet refactored")
     def testAdditiveLatent(self):
         with pm.Model() as model1:
-            gp1 = pm.gp.Latent(self.means[0], self.covs[0])
-            gp2 = pm.gp.Latent(self.means[1], self.covs[1])
-            gp3 = pm.gp.Latent(self.means[2], self.covs[2])
+            gp1 = pm.gp.Latent(mean_func=self.means[0], cov_func=self.covs[0])
+            gp2 = pm.gp.Latent(mean_func=self.means[1], cov_func=self.covs[1])
+            gp3 = pm.gp.Latent(mean_func=self.means[2], cov_func=self.covs[2])
 
             gpsum = gp1 + gp2 + gp3
             fsum = gpsum.prior("fsum", self.X, reparameterize=False)
             model1_logp = model1.logp({"fsum": self.y})
 
         with pm.Model() as model2:
-            gptot = pm.gp.Latent(reduce(add, self.means), reduce(add, self.covs))
+            gptot = pm.gp.Latent(mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs))
             fsum = gptot.prior("fsum", self.X, reparameterize=False)
             model2_logp = model2.logp({"fsum": self.y})
         npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)
@@ -974,13 +971,10 @@ def testAdditiveLatent(self):
             fp2 = gptot.conditional("fp2", self.Xnew)
 
         fp = np.random.randn(self.Xnew.shape[0])
-        npt.assert_allclose(
-            fp1.logp({"fsum": self.y, "fp1": fp}),
-            fp2.logp({"fsum": self.y, "fp2": fp}),
-            atol=0,
-            rtol=1e-2,
-        )
-
+        logp1 = model1.logp({"fsum": self.y, "fp1": fp})
+        logp2 = model2.logp({"fsum": self.y, "fp2": fp})
+        npt.assert_allclose(logp1, logp2, atol=0, rtol=1e-2)
+        
     def testAdditiveSparseRaises(self):
         # cant add different approximations
         with pm.Model() as model:
@@ -1007,7 +1001,6 @@ def testAdditiveTypeRaises2(self):
                 gp1 + gp2
 
 
-# @pytest.mark.xfail(reason="MvNormal was not yet refactored")
 class TestTP:
     R"""
     Compare TP with high degress of freedom to GP
@@ -1018,17 +1011,18 @@ def setup_method(self):
         y = np.random.randn(20)
         Xnew = np.random.randn(30, 3)
         pnew = np.random.randn(30)
-        with pm.Model() as model:
+        
+        with pm.Model() as model1:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             gp = pm.gp.Latent(cov_func=cov_func)
             f = gp.prior("f", X, reparameterize=False)
             p = gp.conditional("p", Xnew)
+        self.gp_latent_logp = model1.logp({"f": y, "p": pnew})
         self.X = X
         self.y = y
         self.Xnew = Xnew
         self.pnew = pnew
-        self.nu = 1000
-        self.gp_latent_logp = model.logp({"f": y, "p": pnew})
+        self.nu = 10000
 
     def testTPvsLatent(self):
         with pm.Model() as model:
@@ -1047,7 +1041,6 @@ def testTPvsLatentReparameterized(self):
             p = tp.conditional("p", self.Xnew)
         chol = np.linalg.cholesky(cov_func(self.X).eval())
         f_rotated = np.linalg.solve(chol, self.y)
-
         tp_logp = model.logp({"f_rotated_": f_rotated, "p": self.pnew})
         npt.assert_allclose(self.gp_latent_logp, tp_logp, atol=0, rtol=1e-2)
 
@@ -1077,7 +1070,7 @@ def setup_method(self):
         self.y = np.random.randn(self.N) * 0.1
         self.Xnews = (np.random.randn(5, 1), np.random.randn(5, 1), np.random.randn(5, 1))
         self.Xnew = np.concatenate(self.Xnews, axis=1)
-        self.pnew = np.random.randn(len(self.Xnew)) * 0.01
+        self.pnew = np.random.randn(len(self.Xnew))
         ls = 0.2
         with pm.Model() as latent_model:
             self.cov_funcs = (
@@ -1134,7 +1127,7 @@ def setup_method(self):
         self.Xnews = (np.random.randn(5, 1), np.random.randn(5, 1), np.random.randn(5, 1))
         self.Xnew = np.concatenate(self.Xnews, axis=1)
         self.sigma = 0.2
-        self.pnew = np.random.randn(len(self.Xnew)) * 0.01
+        self.pnew = np.random.randn(len(self.Xnew))
         ls = 0.2
         with pm.Model() as model:
             self.cov_funcs = [

From 9b26ca18eeb6bc932f1e001fe4a4929371c685ec Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Thu, 28 Oct 2021 00:50:25 -0700
Subject: [PATCH 15/32] black stuff

---
 pymc/tests/test_gp.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index abaac22b604..5e4c0febf43 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -897,7 +897,9 @@ def testAdditiveMarginal(self):
             model1_logp = model1.logp({"fsum": self.y})
 
         with pm.Model() as model2:
-            gptot = pm.gp.Marginal(mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs))
+            gptot = pm.gp.Marginal(
+                mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs)
+            )
             fsum = gptot.marginal_likelihood("f", self.X, self.y, noise=self.noise)
             model2_logp = model2.logp({"fsum": self.y})
         npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)
@@ -919,9 +921,15 @@ def testAdditiveMarginalSparse(self, approx):
         Xu = np.random.randn(10, 3)
         sigma = 0.1
         with pm.Model() as model1:
-            gp1 = pm.gp.MarginalSparse(mean_func=self.means[0], cov_func=self.covs[0], approx=approx)
-            gp2 = pm.gp.MarginalSparse(mean_func=self.means[1], cov_func=self.covs[1], approx=approx)
-            gp3 = pm.gp.MarginalSparse(mean_func=self.means[2], cov_func=self.covs[2], approx=approx)
+            gp1 = pm.gp.MarginalSparse(
+                mean_func=self.means[0], cov_func=self.covs[0], approx=approx
+            )
+            gp2 = pm.gp.MarginalSparse(
+                mean_func=self.means[1], cov_func=self.covs[1], approx=approx
+            )
+            gp3 = pm.gp.MarginalSparse(
+                mean_func=self.means[2], cov_func=self.covs[2], approx=approx
+            )
 
             gpsum = gp1 + gp2 + gp3
             fsum = gpsum.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma)
@@ -974,7 +982,7 @@ def testAdditiveLatent(self):
         logp1 = model1.logp({"fsum": self.y, "fp1": fp})
         logp2 = model2.logp({"fsum": self.y, "fp2": fp})
         npt.assert_allclose(logp1, logp2, atol=0, rtol=1e-2)
-        
+
     def testAdditiveSparseRaises(self):
         # cant add different approximations
         with pm.Model() as model:
@@ -1011,7 +1019,7 @@ def setup_method(self):
         y = np.random.randn(20)
         Xnew = np.random.randn(30, 3)
         pnew = np.random.randn(30)
-        
+
         with pm.Model() as model1:
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             gp = pm.gp.Latent(cov_func=cov_func)

From 76b6c16586313cc8267f5f4f5b476d51aa5fa4a2 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Thu, 28 Oct 2021 12:51:38 -0700
Subject: [PATCH 16/32] small fixes to get kronlatent and kronmarginal to pass

---
 pymc/gp/gp.py         | 61 +++++++++++++++++++++++--------------------
 pymc/gp/util.py       | 12 +++++----
 pymc/tests/test_gp.py | 22 ++++++++++------
 3 files changed, 54 insertions(+), 41 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 9e2a8d318a9..a0f2beaa281 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -21,9 +21,11 @@
 
 import pymc as pm
 
+from pymc.distributions.distribution import NoDistribution
 from pymc.gp.cov import Constant, Covariance
 from pymc.gp.mean import Zero
 from pymc.gp.util import (
+    JITTER_DEFAULT,
     cholesky,
     conditioned_vars,
     infer_size,
@@ -33,11 +35,8 @@
     stabilize,
 )
 from pymc.math import cartesian, kron_diag, kron_dot, kron_solve_lower, kron_solve_upper
-from pymc.distributions.distribution import NoDistribution
 from pymc.model import modelcontext
 
-JITTER_DEFAULT = 1e-6
-
 __all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"]
 
 
@@ -163,7 +162,7 @@ def prior(self, name, X, reparameterize=True, jitter=JITTER_DEFAULT, **kwargs):
             Reparameterize the distribution by rotating the random
             variable by the Cholesky factor of the covariance matrix.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to distribution constructor.
@@ -226,7 +225,7 @@ def conditional(self, name, Xnew, given=None, jitter=0.0, **kwargs):
             and `gp`.  See the section in the documentation on additive GP
             models in PyMC for more information.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  For conditionals
             the default value is 0.0.
         **kwargs
@@ -344,7 +343,7 @@ def conditional(self, name, Xnew, jitter=0.0, **kwargs):
         Xnew: array-like
             Function input values.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  For conditionals
             the default value is 0.0.
         **kwargs
@@ -436,7 +435,7 @@ def marginal_likelihood(self, name, X, y, noise, jitter=0.0, is_observed=True, *
             Standard deviation of the Gaussian noise.  Can also be a Covariance for
             non-white noise.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  Default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
@@ -477,7 +476,9 @@ def _get_given_vals(self, given):
             X, y, noise = self.X, self.y, self.noise
         return X, y, noise, cov_total, mean_total
 
-    def _build_conditional(self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total, jitter):
+    def _build_conditional(
+        self, Xnew, pred_noise, diag, X, y, noise, cov_total, mean_total, jitter
+    ):
         Kxx = cov_total(X)
         Kxs = self.cov_func(X, Xnew)
         Knx = noise(X)
@@ -528,7 +529,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kw
             and `gp`.  See the section in the documentation on additive GP
             models in PyMC for more information.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  For conditionals
             the default value is 0.0.
         **kwargs
@@ -540,7 +541,9 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kw
         mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
-    def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=0.0, model=None):
+    def predict(
+        self, Xnew, point=None, diag=False, pred_noise=False, given=None, jitter=0.0, model=None
+    ):
         R"""
         Return the mean vector and covariance matrix of the conditional
         distribution as numpy arrays, given a `point`, such as the MAP
@@ -562,7 +565,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None, ji
         given: dict
             Same as `conditional` method.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  For conditionals
             the default value is 0.0.
         """
@@ -708,7 +711,9 @@ def _build_marginal_likelihood_logp(self, y, X, Xu, sigma, jitter):
         quadratic = 0.5 * (at.dot(r, r_l) - at.dot(c, c))
         return -1.0 * (constant + logdet + quadratic + trace)
 
-    def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitter=0.0, **kwargs):
+    def marginal_likelihood(
+        self, name, X, Xu, y, noise=None, is_observed=True, jitter=0.0, **kwargs
+    ):
         R"""
         Returns the approximate marginal likelihood distribution, given the input
         locations `X`, inducing point locations `Xu`, data `y`, and white noise
@@ -732,7 +737,7 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitt
             Whether to set `y` as an `observed` variable in the `model`.
             Default is `True`.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  Default value is 0.0.
         **kwargs
             Extra keyword arguments that are passed to `MvNormal` distribution
@@ -787,7 +792,9 @@ def marginal_likelihood(self, name, X, Xu, y, noise=None, is_observed=True, jitt
                 **kwargs,
             )
 
-    def _build_conditional(self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total, jitter):
+    def _build_conditional(
+        self, Xnew, pred_noise, diag, X, Xu, y, sigma, cov_total, mean_total, jitter
+    ):
         sigma2 = at.square(sigma)
         Kuu = cov_total(Xu)
         Kuf = cov_total(Xu, X)
@@ -855,7 +862,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kw
             and `gp`.  See the section in the documentation on additive GP
             models in PyMC for more information.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  For conditionals
             the default value is 0.0.
         **kwargs
@@ -866,7 +873,7 @@ def conditional(self, name, Xnew, pred_noise=False, given=None, jitter=0.0, **kw
         givens = self._get_given_vals(given)
         mu, cov = self._build_conditional(Xnew, pred_noise, False, *givens, jitter)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
-    
+
 
 @conditioned_vars(["Xs", "f"])
 class LatentKron(Base):
@@ -934,15 +941,14 @@ def __add__(self, other):
         raise TypeError("Additive, Kronecker-structured processes not implemented")
 
     def _build_prior(self, name, Xs, jitter, **kwargs):
-        self.N = np.prod([len(X) for X in Xs])
+        self.N = int(np.prod([len(X) for X in Xs]))
         mu = self.mean_func(cartesian(*Xs))
-        chols = [cholesky(stabilize(cov(X, jitter))) for cov, X in zip(self.cov_funcs, Xs)]
-        # remove reparameterization option
+        chols = [cholesky(stabilize(cov(X), jitter)) for cov, X in zip(self.cov_funcs, Xs)]
         v = pm.Normal(name + "_rotated_", mu=0.0, sigma=1.0, size=self.N, **kwargs)
         f = pm.Deterministic(name, mu + at.flatten(kron_dot(chols, v)))
         return f
 
-    def prior(self, name, Xs, jitter, **kwargs):
+    def prior(self, name, Xs, jitter=JITTER_DEFAULT, **kwargs):
         """
         Returns the prior distribution evaluated over the input
         locations `Xs`.
@@ -957,7 +963,7 @@ def prior(self, name, Xs, jitter, **kwargs):
             total covariance function is measured on the full grid
             `cartesian(*Xs)`.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  Default value is 1e-6.
         **kwargs
             Extra keyword arguments that are passed to the `KroneckerNormal`
@@ -965,6 +971,7 @@ def prior(self, name, Xs, jitter, **kwargs):
         """
         if len(Xs) != len(self.cov_funcs):
             raise ValueError("Must provide a covariance function for each X")
+
         f = self._build_prior(name, Xs, jitter, **kwargs)
         self.Xs = Xs
         self.f = f
@@ -1015,7 +1022,7 @@ def conditional(self, name, Xnew, jitter=0.0, **kwargs):
             Function input values.  If one-dimensional, must be a column
             vector with shape `(n, 1)`.
         jitter: scalar
-            A small correction added to the diagonal of positive semi-definite 
+            A small correction added to the diagonal of positive semi-definite
             covariance matrices to ensure numerical stability.  For conditionals
             the default value is 0.0.
         **kwargs
@@ -1023,8 +1030,7 @@ def conditional(self, name, Xnew, jitter=0.0, **kwargs):
             constructor.
         """
         mu, cov = self._build_conditional(Xnew, jitter)
-        size = infer_size(Xnew, kwargs.pop("size", None))
-        return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs)
+        return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
 
 @conditioned_vars(["Xs", "y", "sigma"])
@@ -1103,7 +1109,7 @@ def _build_marginal_likelihood(self, Xs):
         return mu, covs
 
     def _check_inputs(self, Xs, y):
-        N = np.prod([len(X) for X in Xs])
+        N = int(np.prod([len(X) for X in Xs]))
         if len(Xs) != len(self.cov_funcs):
             raise ValueError("Must provide a covariance function for each X")
         if N != len(y):
@@ -1152,7 +1158,7 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
                 "unobserved use gp.LatentKron instead.",
                 DeprecationWarning,
             )
-            size = np.prod([len(X) for X in Xs])
+            size = int(np.prod([len(X) for X in Xs]))
             return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs)
 
     def _build_conditional(self, Xnew, pred_noise, diag):
@@ -1229,8 +1235,7 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs):
             constructor.
         """
         mu, cov = self._build_conditional(Xnew, pred_noise, False)
-        size = infer_size(Xnew, kwargs.pop("size", None))
-        return pm.MvNormal(name, mu=mu, cov=cov, size=size, **kwargs)
+        return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
     def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None):
         R"""
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 39fe62e9bea..5b1efb7a695 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -35,15 +35,17 @@
 
 from pymc.aesaraf import compile_rv_inplace, walk_model
 
+JITTER_DEFAULT = 1e-6
+
 
 # TODO: add test
 def replace_with_values(model, vars_needed, replacements=None):
     R"""
     Replace random variable nodes in the graph with values given by the replacements dict.
     Uses untransformed versions of the inputs, performs some basic input validation.
-    
+
     NOTE TO REVIEWER:  Modified this from `sample_posterior_predictive`.  Is there a better way to do this?
-    
+
     """
     inputs, input_names = [], []
     for rv in walk_model(vars_needed, walk_past_rvs=True):
@@ -53,8 +55,8 @@ def replace_with_values(model, vars_needed, replacements=None):
 
     # then deterministic, no inputs are required, can eval and return
     if len(inputs) == 0:
-        return tuple([v.eval() for v in vars_needed])
-            
+        return tuple(v.eval() for v in vars_needed)
+
     fn = compile_rv_inplace(
         inputs,
         vars_needed,
@@ -95,7 +97,7 @@ def infer_size(X, n_points=None):
     return n_points
 
 
-def stabilize(K, jitter=1e-6):
+def stabilize(K, jitter=JITTER_DEFAULT):
     R"""
     Adds small diagonal to a covariance matrix.
 
diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index 5e4c0febf43..232121cbe9f 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -937,7 +937,7 @@ def testAdditiveMarginalSparse(self, approx):
 
         with pm.Model() as model2:
             gptot = pm.gp.MarginalSparse(
-                reduce(add, self.means), reduce(add, self.covs), approx=approx
+                mean_func=reduce(add, self.means), cov_func=reduce(add, self.covs), approx=approx
             )
             fsum = gptot.marginal_likelihood("f", self.X, Xu, self.y, noise=sigma)
             model2_logp = model2.logp({"fsum": self.y})
@@ -953,9 +953,11 @@ def testAdditiveMarginalSparse(self, approx):
             fp2 = gptot.conditional("fp2", self.Xnew)
 
         fp = np.random.randn(self.Xnew.shape[0])
-        npt.assert_allclose(
-            pm.logp(fp1, fp1).eval({fp1: fp}), pm.logp(fp2, fp2).eval({fp2: fp}), atol=0, rtol=1e-2
-        )
+
+        model1_logp = model1.logp({"fp1": fp, "fsum": self.y})
+        model2_logp = model2.logp({"fp2": fp, "fsum": self.y})
+
+        npt.assert_allclose(model1_logp, model2_logp, atol=0, rtol=1e-2)
 
     def testAdditiveLatent(self):
         with pm.Model() as model1:
@@ -1061,7 +1063,6 @@ def testAdditiveTPRaises(self):
                 gp1 + gp2
 
 
-@pytest.mark.xfail(reason="MvNormal was not yet refactored")
 class TestLatentKron:
     """
     Compare gp.LatentKron to gp.Latent, both with Gaussian noise.
@@ -1117,7 +1118,6 @@ def testLatentKronRaisesSizes(self):
             gp.prior("f", Xs=[np.linspace(0, 1, 7)[:, None], np.linspace(0, 1, 5)[:, None]])
 
 
-@pytest.mark.xfail(reason="The `gp.predict` method was not yet refactored")
 class TestMarginalKron:
     """
     Compare gp.MarginalKron to gp.Marginal.
@@ -1136,6 +1136,10 @@ def setup_method(self):
         self.Xnew = np.concatenate(self.Xnews, axis=1)
         self.sigma = 0.2
         self.pnew = np.random.randn(len(self.Xnew))
+
+        print("pnew", self.pnew)
+        print("y", self.y)
+
         ls = 0.2
         with pm.Model() as model:
             self.cov_funcs = [
@@ -1154,7 +1158,8 @@ def setup_method(self):
     def testMarginalKronvsMarginalpredict(self):
         with pm.Model() as kron_model:
             kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
-            f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N)
+            # f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N)
+            f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma)
             p = kron_gp.conditional("p", self.Xnew)
             mu, cov = kron_gp.predict(self.Xnew)
         npt.assert_allclose(mu, self.mu, atol=1e-5, rtol=1e-2)
@@ -1163,7 +1168,8 @@ def testMarginalKronvsMarginalpredict(self):
     def testMarginalKronvsMarginal(self):
         with pm.Model() as kron_model:
             kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
-            f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N)
+            # f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N)
+            f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma)
             p = kron_gp.conditional("p", self.Xnew)
         kron_logp = kron_model.logp({"p": self.pnew})
         npt.assert_allclose(kron_logp, self.logp, atol=0, rtol=1e-2)

From 90ae450fcd6b73a2d19ba357b9258fc716d710d6 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Thu, 28 Oct 2021 13:08:22 -0700
Subject: [PATCH 17/32] remove leftover prints

---
 pymc/tests/test_gp.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index 232121cbe9f..22f22792aae 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -1137,9 +1137,6 @@ def setup_method(self):
         self.sigma = 0.2
         self.pnew = np.random.randn(len(self.Xnew))
 
-        print("pnew", self.pnew)
-        print("y", self.y)
-
         ls = 0.2
         with pm.Model() as model:
             self.cov_funcs = [

From df1fe573d15370f2a95b18f09c34c038fb230497 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Fri, 29 Oct 2021 19:41:13 -0700
Subject: [PATCH 18/32] dep warning -> future warning

---
 pymc/gp/gp.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index a0f2beaa281..f3f72cb0725 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -454,7 +454,7 @@ def marginal_likelihood(self, name, X, y, noise, jitter=0.0, is_observed=True, *
             warnings.warn(
                 "The 'is_observed' argument has been deprecated.  If the GP is "
                 "unobserved use gp.Latent instead.",
-                DeprecationWarning,
+                FutureWarning,
             )
             return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
@@ -776,7 +776,7 @@ def marginal_likelihood(
             warnings.warn(
                 "The 'is_observed' argument has been deprecated.  If the GP is "
                 "unobserved use gp.Latent instead.",
-                DeprecationWarning,
+                FutureWarning,
             )
             return pm.DensityDist(
                 name,
@@ -1156,7 +1156,7 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
             warnings.warn(
                 "The 'is_observed' argument has been deprecated.  If the GP is "
                 "unobserved use gp.LatentKron instead.",
-                DeprecationWarning,
+                FutureWarning,
             )
             size = int(np.prod([len(X) for X in Xs]))
             return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs)

From dceabf8a448f4e8e88ef4b6e214cc8c4917cc49b Mon Sep 17 00:00:00 2001
From: Bill Engels <w.j.engels@gmail.com>
Date: Tue, 2 Nov 2021 18:47:53 -0700
Subject: [PATCH 19/32] roll back mkl and mkl-service version

---
 conda-envs/windows-environment-dev-py38.yml  | 2 ++
 conda-envs/windows-environment-test-py38.yml | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/conda-envs/windows-environment-dev-py38.yml b/conda-envs/windows-environment-dev-py38.yml
index 2f143a72af0..9dda7976d14 100644
--- a/conda-envs/windows-environment-dev-py38.yml
+++ b/conda-envs/windows-environment-dev-py38.yml
@@ -11,6 +11,8 @@ dependencies:
 - cloudpickle
 - fastprogress>=0.2.0
 - h5py
+- mkl==2020.4
+- mkl-service==2.3.0
 - numpy>=1.15.0
 - pandas
 - pip
diff --git a/conda-envs/windows-environment-test-py38.yml b/conda-envs/windows-environment-test-py38.yml
index 490e402c632..877b849882e 100644
--- a/conda-envs/windows-environment-test-py38.yml
+++ b/conda-envs/windows-environment-test-py38.yml
@@ -12,7 +12,8 @@ dependencies:
 - fastprogress>=0.2.0
 - h5py>=2.7
 - libpython
-- mkl-service
+- mkl==2020.4
+- mkl-service==2.3.0
 - m2w64-toolchain
 - numpy>=1.15.0
 - pandas

From 4692407efcef912a5a7bc1c7d5bc976efd2262b2 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Tue, 2 Nov 2021 19:26:20 -0700
Subject: [PATCH 20/32] fix precommit

---
 pymc/gp/gp.py   | 1 +
 pymc/gp/util.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index f3f72cb0725..65c7841cc4a 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -21,6 +21,7 @@
 
 import pymc as pm
 
+# Avoid circular dependency
 from pymc.distributions.distribution import NoDistribution
 from pymc.gp.cov import Constant, Covariance
 from pymc.gp.mean import Zero
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 5b1efb7a695..4dd9d9b7954 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -14,7 +14,7 @@
 
 import warnings
 
-from typing import Dict, Optional
+from typing import Dict
 
 import aesara.tensor as at
 import numpy as np

From cc16bd530b123927168d2e83441db8c6400849d3 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Tue, 2 Nov 2021 19:28:46 -0700
Subject: [PATCH 21/32] remove old DeprecationWarning

---
 pymc/gp/gp.py | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 65c7841cc4a..475e7314059 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -748,18 +748,12 @@ def marginal_likelihood(
         self.X = X
         self.Xu = Xu
         self.y = y
+
         if noise is None:
-            sigma = kwargs.get("sigma")
-            if sigma is None:
-                raise ValueError("noise argument must be specified")
-            else:
-                self.sigma = sigma
-                warnings.warn(
-                    "The 'sigma' argument has been deprecated. Use 'noise' instead.",
-                    DeprecationWarning,
-                )
+            raise ValueError("noise argument must be specified")
         else:
             self.sigma = noise
+
         if is_observed:
             return pm.DensityDist(
                 name,

From 9763b98fcdb5d748a993e50286c9ebfa2497d41f Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 3 Nov 2021 16:53:11 -0700
Subject: [PATCH 22/32] improve tests

cleanup gp.util.replace_with_values and add docstrings
---
 pymc/gp/cov.py        |   2 +-
 pymc/gp/gp.py         |  16 +++---
 pymc/gp/util.py       |  31 ++++++++---
 pymc/tests/test_gp.py | 117 +++++++++++++++++++++++++++++++-----------
 4 files changed, 118 insertions(+), 48 deletions(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index bbeeb41026f..f5dc1c9a13b 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -152,7 +152,7 @@ def __array_wrap__(self, result):
         elif isinstance(result[0][0], Prod):
             return result[0][0].factor_list[0] * A
         else:
-            raise RuntimeError
+            raise TypeError(f"Unknown Covariance combination type {result[0][0]}.  Known types are `Add` or `Prod`.")
 
 
 class Combination(Covariance):
diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 475e7314059..4f762ecb664 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -1109,9 +1109,7 @@ def _check_inputs(self, Xs, y):
             raise ValueError("Must provide a covariance function for each X")
         if N != len(y):
             raise ValueError(
-                (
-                    "Length of y ({}) must match length of cartesian" "cartesian product of Xs ({})"
-                ).format(len(y), N)
+                f"Length of y ({len(y)}) must match length of cartesian product of Xs ({N})" 
             )
 
     def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
@@ -1156,7 +1154,7 @@ def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
             size = int(np.prod([len(X) for X in Xs]))
             return pm.KroneckerNormal(name, mu=mu, covs=covs, sigma=sigma, size=size, **kwargs)
 
-    def _build_conditional(self, Xnew, pred_noise, diag):
+    def _build_conditional(self, Xnew, diag, pred_noise):
         Xs, y, sigma = self.Xs, self.y, self.sigma
 
         # Old points
@@ -1170,7 +1168,9 @@ def _build_conditional(self, Xnew, pred_noise, diag):
             eigs += sigma ** 2
 
         # New points
+        print('diag', diag)
         Km = self.cov_func(Xnew, diag=diag)
+        print('Km', Km.eval())
         Knm = self.cov_func(X, Xnew)
         Kmn = Knm.T
 
@@ -1195,7 +1195,7 @@ def _build_conditional(self, Xnew, pred_noise, diag):
                 cov += sigma * at.identity_like(cov)
         return mu, cov
 
-    def conditional(self, name, Xnew, pred_noise=False, **kwargs):
+    def conditional(self, name, Xnew, pred_noise=False, diag=False, **kwargs):
         """
         Returns the conditional distribution evaluated over new input
         locations `Xnew`, just as in `Marginal`.
@@ -1229,7 +1229,7 @@ def conditional(self, name, Xnew, pred_noise=False, **kwargs):
             Extra keyword arguments that are passed to `MvNormal` distribution
             constructor.
         """
-        mu, cov = self._build_conditional(Xnew, pred_noise, False)
+        mu, cov = self._build_conditional(Xnew, diag, pred_noise)
         return pm.MvNormal(name, mu=mu, cov=cov, **kwargs)
 
     def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None):
@@ -1252,8 +1252,8 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None):
             Whether or not observation noise is included in the conditional.
             Default is `False`.
         """
+        mu, cov = self._predict_at(Xnew, diag, pred_noise)
         model = modelcontext(model)
-        mu, cov = self._predict_at(Xnew, pred_noise, diag)
         return replace_with_values(model, [mu, cov], replacements=point)
 
     def _predict_at(self, Xnew, diag=False, pred_noise=False):
@@ -1273,5 +1273,5 @@ def _predict_at(self, Xnew, diag=False, pred_noise=False):
             Whether or not observation noise is included in the conditional.
             Default is `False`.
         """
-        mu, cov = self._build_conditional(Xnew, pred_noise, diag)
+        mu, cov = self._build_conditional(Xnew, diag, pred_noise)
         return mu, cov
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 4dd9d9b7954..3414ed741c9 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -43,9 +43,15 @@ def replace_with_values(model, vars_needed, replacements=None):
     R"""
     Replace random variable nodes in the graph with values given by the replacements dict.
     Uses untransformed versions of the inputs, performs some basic input validation.
-
-    NOTE TO REVIEWER:  Modified this from `sample_posterior_predictive`.  Is there a better way to do this?
-
+    
+    Parameters
+    ----------
+    model: Model
+        A PyMC model object
+    vars_needed: list of TensorVariables
+        A list of variable outputs
+    replacements: dict with string keys, numeric values
+    
     """
     inputs, input_names = [], []
     for rv in walk_model(vars_needed, walk_past_rvs=True):
@@ -64,11 +70,14 @@ def replace_with_values(model, vars_needed, replacements=None):
         accept_inplace=True,
         on_unused_input="ignore",
     )
-
+    
+    # remove unneeded inputs    
     replacements = {name: val for name, val in replacements.items() if name in input_names}
-    missing = set(vars_needed) - set(replacements.keys())
+    missing = set(input_names) - set(replacements.keys())
+    
+    # error if more inputs needed
     if len(missing) > 0:
-        missing_str = ", ".join({m.name for m in missing})
+        missing_str = ", ".join(missing)
         raise ValueError(f"Values for {missing_str} must be included in `replacements`.")
 
     return fn(**replacements)
@@ -115,7 +124,7 @@ def stabilize(K, jitter=JITTER_DEFAULT):
     return K + jitter * at.identity_like(K)
 
 
-def kmeans_inducing_points(n_inducing, X):
+def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs):
     R"""
     Use the K-means algorithm to initialize the locations `X` for the inducing
     points `fu`.
@@ -126,6 +135,8 @@ def kmeans_inducing_points(n_inducing, X):
         The number of inducing points (or k, the number of clusters)
     X: array-like
         Gaussian process input matrix.
+    **kmeans_kwargs:
+        Extra keyword arguments that are passed to `scipy.cluster.vq.kmeans`
     """
     # first whiten X
     if isinstance(X, TensorConstant):
@@ -143,7 +154,11 @@ def kmeans_inducing_points(n_inducing, X):
     # if std of a column is very small (zero), don't normalize that column
     scaling[scaling <= 1e-6] = 1.0
     Xw = X / scaling
-    Xu, distortion = kmeans(Xw, n_inducing)
+    
+    if "k_or_guess" in kmeans_kwargs:
+        warn.UserWarning("Use `n_inducing` to set the `k_or_guess` parameter instead.")
+    
+    Xu, distortion = kmeans(Xw, k_or_guess=n_inducing, **kmeans_kwargs)
     return Xu * scaling
 
 
diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index 22f22792aae..6de5243a7c0 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -688,6 +688,34 @@ def test_raises(self):
         with pytest.raises(TypeError):
             pm.gp.cov.ScaledCov(1, "str is not Covariance object", lambda x: x)
 
+                
+class TestCircular:
+    def test_1d_tau1(self):
+        X = np.linspace(0, 1, 10)[:, None]
+        etalon = 0.600881
+        with pm.Model():
+            cov = pm.gp.cov.Circular(1, 1, tau=5)
+        K = cov(X).eval()
+        npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
+        K = cov(X, X).eval()
+        npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
+        # check diagonal
+        Kd = cov(X, diag=True).eval()
+        npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
+
+    def test_1d_tau2(self):
+        X = np.linspace(0, 1, 10)[:, None]
+        etalon = 0.691239
+        with pm.Model():
+            cov = pm.gp.cov.Circular(1, 1, tau=4)
+        K = cov(X).eval()
+        npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
+        K = cov(X, X).eval()
+        npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
+        # check diagonal
+        Kd = cov(X, diag=True).eval()
+        npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
+
 
 class TestHandleArgs:
     def test_handleargs(self):
@@ -765,7 +793,7 @@ def test_raises3(self):
         with pm.Model() as model:
             with pytest.raises(ValueError):
                 B = pm.gp.cov.Coregion(1)
-
+                
 
 class TestMarginalVsLatent:
     R"""
@@ -1155,17 +1183,18 @@ def setup_method(self):
     def testMarginalKronvsMarginalpredict(self):
         with pm.Model() as kron_model:
             kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
-            # f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N)
             f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma)
             p = kron_gp.conditional("p", self.Xnew)
             mu, cov = kron_gp.predict(self.Xnew)
         npt.assert_allclose(mu, self.mu, atol=1e-5, rtol=1e-2)
         npt.assert_allclose(cov, self.cov, atol=1e-5, rtol=1e-2)
+        with kron_model:
+            _, var = kron_gp.predict(self.Xnew, diag=True)
+        npt.assert_allclose(np.diag(cov), var, atol=1e-5, rtol=1e-2)
 
     def testMarginalKronvsMarginal(self):
         with pm.Model() as kron_model:
             kron_gp = pm.gp.MarginalKron(mean_func=self.mean, cov_funcs=self.cov_funcs)
-            # f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma, shape=self.N)
             f = kron_gp.marginal_likelihood("f", self.Xs, self.y, sigma=self.sigma)
             p = kron_gp.conditional("p", self.Xnew)
         kron_logp = kron_model.logp({"p": self.pnew})
@@ -1179,7 +1208,7 @@ def testMarginalKronRaises(self):
             gp1 + gp2
 
 
-class TestUtil:
+class TestPlotGP:
     def test_plot_gp_dist(self):
         """Test that the plotting helper works with the stated input shapes."""
         import matplotlib.pyplot as plt
@@ -1206,31 +1235,57 @@ def test_plot_gp_dist_warn_nan(self):
             pm.gp.util.plot_gp_dist(ax, x=np.linspace(0, 50, X), samples=samples)
         plt.close()
         pass
+    
 
-
-class TestCircular:
-    def test_1d_tau1(self):
-        X = np.linspace(0, 1, 10)[:, None]
-        etalon = 0.600881
-        with pm.Model():
-            cov = pm.gp.cov.Circular(1, 1, tau=5)
-        K = cov(X).eval()
-        npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
-        K = cov(X, X).eval()
-        npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
-        # check diagonal
-        Kd = cov(X, diag=True).eval()
-        npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
-
-    def test_1d_tau2(self):
-        X = np.linspace(0, 1, 10)[:, None]
-        etalon = 0.691239
-        with pm.Model():
-            cov = pm.gp.cov.Circular(1, 1, tau=4)
-        K = cov(X).eval()
-        npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
-        K = cov(X, X).eval()
-        npt.assert_allclose(K[0, 1], etalon, atol=1e-3)
-        # check diagonal
-        Kd = cov(X, diag=True).eval()
-        npt.assert_allclose(np.diag(K), Kd, atol=1e-5)
+class TestKmeansInducing:
+    def setup_method(self):
+        self.centers = (-5, 5)
+        self.x = np.concatenate(
+            (
+                self.centers[0] + np.random.randn(500), 
+                self.centers[1] + np.random.randn(500)
+            )
+        )
+    
+    def test_kmeans(self):
+        X = self.x[:, None]
+        Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten()
+        npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1)
+        
+        X = at.as_tensor_variable(self.x[:, None])
+        Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten()
+        npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1)
+        
+    def test_kmeans_raises(self):
+        with pytest.raises(TypeError):
+            Xu = pm.gp.util.kmeans_inducing_points(2, "str is the wrong type").flatten()
+        
+    
+    
+class TestReplaceWithValues:
+    def test_basic_replace(self):
+        with pm.Model() as model:
+            a = pm.Normal("a")
+            b = pm.Normal("b", mu=a)
+            c = a*b
+        
+        c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"a": 2, "b": 3, "x": 100})
+        assert c_val == np.array(6.0)
+        
+    def test_replace_no_inputs_needed(self):
+        with pm.Model() as model:
+            a = at.as_tensor_variable(2.0)
+            b = 1.0 + a
+            c = a*b
+        
+        c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"x": 100})
+        assert c_val == np.array(6.0)
+        
+    def test_missing_input(self):
+        with pm.Model() as model:
+            a = pm.Normal("a")
+            b = pm.Normal("b", mu=a)
+            c = a*b
+        
+        with pytest.raises(ValueError):
+            c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"a": 2, "x": 100})
\ No newline at end of file

From 23fc5c7bb492eee3756f2710d6461c610b27da66 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 3 Nov 2021 17:08:19 -0700
Subject: [PATCH 23/32] fix pre-commit issue

---
 pymc/gp/gp.py         | 14 +++----------
 pymc/gp/util.py       | 31 ++++++++++++++++------------
 pymc/tests/test_gp.py | 47 +++++++++++++++++++++----------------------
 3 files changed, 44 insertions(+), 48 deletions(-)

diff --git a/pymc/gp/gp.py b/pymc/gp/gp.py
index 4f762ecb664..b8a76519598 100644
--- a/pymc/gp/gp.py
+++ b/pymc/gp/gp.py
@@ -21,8 +21,6 @@
 
 import pymc as pm
 
-# Avoid circular dependency
-from pymc.distributions.distribution import NoDistribution
 from pymc.gp.cov import Constant, Covariance
 from pymc.gp.mean import Zero
 from pymc.gp.util import (
@@ -36,7 +34,6 @@
     stabilize,
 )
 from pymc.math import cartesian, kron_diag, kron_dot, kron_solve_lower, kron_solve_upper
-from pymc.model import modelcontext
 
 __all__ = ["Latent", "Marginal", "TP", "MarginalSparse", "LatentKron", "MarginalKron"]
 
@@ -572,9 +569,8 @@ def predict(
         """
         if given is None:
             given = {}
-        model = modelcontext(model)
         mu, cov = self._predict_at(Xnew, diag, pred_noise, given, jitter)
-        return replace_with_values(model, [mu, cov], replacements=point)
+        return replace_with_values([mu, cov], replacements=point, model=model)
 
     def _predict_at(self, Xnew, diag=False, pred_noise=False, given=None, jitter=0.0):
         R"""
@@ -1109,7 +1105,7 @@ def _check_inputs(self, Xs, y):
             raise ValueError("Must provide a covariance function for each X")
         if N != len(y):
             raise ValueError(
-                f"Length of y ({len(y)}) must match length of cartesian product of Xs ({N})" 
+                f"Length of y ({len(y)}) must match length of cartesian product of Xs ({N})"
             )
 
     def marginal_likelihood(self, name, Xs, y, sigma, is_observed=True, **kwargs):
@@ -1167,10 +1163,7 @@ def _build_conditional(self, Xnew, diag, pred_noise):
         if sigma is not None:
             eigs += sigma ** 2
 
-        # New points
-        print('diag', diag)
         Km = self.cov_func(Xnew, diag=diag)
-        print('Km', Km.eval())
         Knm = self.cov_func(X, Xnew)
         Kmn = Knm.T
 
@@ -1253,8 +1246,7 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, model=None):
             Default is `False`.
         """
         mu, cov = self._predict_at(Xnew, diag, pred_noise)
-        model = modelcontext(model)
-        return replace_with_values(model, [mu, cov], replacements=point)
+        return replace_with_values([mu, cov], replacements=point, model=model)
 
     def _predict_at(self, Xnew, diag=False, pred_noise=False):
         R"""
diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 3414ed741c9..5ffd36b8318 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -14,8 +14,6 @@
 
 import warnings
 
-from typing import Dict
-
 import aesara.tensor as at
 import numpy as np
 
@@ -35,24 +33,31 @@
 
 from pymc.aesaraf import compile_rv_inplace, walk_model
 
+# Avoid circular dependency when importing modelcontext
+from pymc.distributions.distribution import NoDistribution
+
+assert NoDistribution  # keep both pylint and black happy
+from pymc.model import modelcontext
+
 JITTER_DEFAULT = 1e-6
 
 
-# TODO: add test
-def replace_with_values(model, vars_needed, replacements=None):
+def replace_with_values(vars_needed, replacements=None, model=None):
     R"""
     Replace random variable nodes in the graph with values given by the replacements dict.
     Uses untransformed versions of the inputs, performs some basic input validation.
-    
+
     Parameters
     ----------
-    model: Model
-        A PyMC model object
     vars_needed: list of TensorVariables
         A list of variable outputs
     replacements: dict with string keys, numeric values
-    
+        The variable name and values to be replaced in the model graph.
+    model: Model
+        A PyMC model object
     """
+    model = modelcontext(model)
+
     inputs, input_names = [], []
     for rv in walk_model(vars_needed, walk_past_rvs=True):
         if rv in model.named_vars.values() and not isinstance(rv, SharedVariable):
@@ -70,11 +75,11 @@ def replace_with_values(model, vars_needed, replacements=None):
         accept_inplace=True,
         on_unused_input="ignore",
     )
-    
-    # remove unneeded inputs    
+
+    # remove unneeded inputs
     replacements = {name: val for name, val in replacements.items() if name in input_names}
     missing = set(input_names) - set(replacements.keys())
-    
+
     # error if more inputs needed
     if len(missing) > 0:
         missing_str = ", ".join(missing)
@@ -154,10 +159,10 @@ def kmeans_inducing_points(n_inducing, X, **kmeans_kwargs):
     # if std of a column is very small (zero), don't normalize that column
     scaling[scaling <= 1e-6] = 1.0
     Xw = X / scaling
-    
+
     if "k_or_guess" in kmeans_kwargs:
         warn.UserWarning("Use `n_inducing` to set the `k_or_guess` parameter instead.")
-    
+
     Xu, distortion = kmeans(Xw, k_or_guess=n_inducing, **kmeans_kwargs)
     return Xu * scaling
 
diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index 6de5243a7c0..48ca02bd306 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -688,7 +688,7 @@ def test_raises(self):
         with pytest.raises(TypeError):
             pm.gp.cov.ScaledCov(1, "str is not Covariance object", lambda x: x)
 
-                
+
 class TestCircular:
     def test_1d_tau1(self):
         X = np.linspace(0, 1, 10)[:, None]
@@ -793,7 +793,7 @@ def test_raises3(self):
         with pm.Model() as model:
             with pytest.raises(ValueError):
                 B = pm.gp.cov.Coregion(1)
-                
+
 
 class TestMarginalVsLatent:
     R"""
@@ -1235,57 +1235,56 @@ def test_plot_gp_dist_warn_nan(self):
             pm.gp.util.plot_gp_dist(ax, x=np.linspace(0, 50, X), samples=samples)
         plt.close()
         pass
-    
+
 
 class TestKmeansInducing:
     def setup_method(self):
         self.centers = (-5, 5)
         self.x = np.concatenate(
-            (
-                self.centers[0] + np.random.randn(500), 
-                self.centers[1] + np.random.randn(500)
-            )
+            (self.centers[0] + np.random.randn(500), self.centers[1] + np.random.randn(500))
         )
-    
+
     def test_kmeans(self):
         X = self.x[:, None]
         Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten()
         npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1)
-        
+
         X = at.as_tensor_variable(self.x[:, None])
         Xu = pm.gp.util.kmeans_inducing_points(2, X).flatten()
         npt.assert_allclose(np.asarray(self.centers), np.sort(Xu), atol=0.1)
-        
+
     def test_kmeans_raises(self):
         with pytest.raises(TypeError):
             Xu = pm.gp.util.kmeans_inducing_points(2, "str is the wrong type").flatten()
-        
-    
-    
+
+
 class TestReplaceWithValues:
     def test_basic_replace(self):
         with pm.Model() as model:
             a = pm.Normal("a")
             b = pm.Normal("b", mu=a)
-            c = a*b
-        
-        c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"a": 2, "b": 3, "x": 100})
+            c = a * b
+
+        (c_val,) = pm.gp.util.replace_with_values(
+            [c], replacements={"a": 2, "b": 3, "x": 100}, model=model
+        )
         assert c_val == np.array(6.0)
-        
+
     def test_replace_no_inputs_needed(self):
         with pm.Model() as model:
             a = at.as_tensor_variable(2.0)
             b = 1.0 + a
-            c = a*b
-        
-        c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"x": 100})
+            c = a * b
+            (c_val,) = pm.gp.util.replace_with_values([c], replacements={"x": 100})
         assert c_val == np.array(6.0)
-        
+
     def test_missing_input(self):
         with pm.Model() as model:
             a = pm.Normal("a")
             b = pm.Normal("b", mu=a)
-            c = a*b
-        
+            c = a * b
+
         with pytest.raises(ValueError):
-            c_val, = pm.gp.util.replace_with_values(model, [c], replacements={"a": 2, "x": 100})
\ No newline at end of file
+            (c_val,) = pm.gp.util.replace_with_values(
+                [c], replacements={"a": 2, "x": 100}, model=model
+            )

From 158529f51743f20385ae618d050f6c2b75ce0e2a Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Wed, 3 Nov 2021 17:14:12 -0700
Subject: [PATCH 24/32] fix precommit on cov.py

---
 pymc/gp/cov.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pymc/gp/cov.py b/pymc/gp/cov.py
index f5dc1c9a13b..1a19edb2bbf 100644
--- a/pymc/gp/cov.py
+++ b/pymc/gp/cov.py
@@ -152,7 +152,9 @@ def __array_wrap__(self, result):
         elif isinstance(result[0][0], Prod):
             return result[0][0].factor_list[0] * A
         else:
-            raise TypeError(f"Unknown Covariance combination type {result[0][0]}.  Known types are `Add` or `Prod`.")
+            raise TypeError(
+                f"Unknown Covariance combination type {result[0][0]}.  Known types are `Add` or `Prod`."
+            )
 
 
 class Combination(Covariance):

From 08a7652dac3a6886fba730b7160bd13722f485b8 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Sun, 7 Nov 2021 17:07:30 -0800
Subject: [PATCH 25/32] fix comment

---
 pymc/gp/util.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pymc/gp/util.py b/pymc/gp/util.py
index 5ffd36b8318..730e664eec4 100644
--- a/pymc/gp/util.py
+++ b/pymc/gp/util.py
@@ -64,7 +64,7 @@ def replace_with_values(vars_needed, replacements=None, model=None):
             inputs.append(rv)
             input_names.append(rv.name)
 
-    # then deterministic, no inputs are required, can eval and return
+    # Then it's deterministic, no inputs are required, can eval and return
     if len(inputs) == 0:
         return tuple(v.eval() for v in vars_needed)
 
@@ -76,11 +76,11 @@ def replace_with_values(vars_needed, replacements=None, model=None):
         on_unused_input="ignore",
     )
 
-    # remove unneeded inputs
+    # Remove unneeded inputs
     replacements = {name: val for name, val in replacements.items() if name in input_names}
     missing = set(input_names) - set(replacements.keys())
 
-    # error if more inputs needed
+    # Error if more inputs are needed
     if len(missing) > 0:
         missing_str = ", ".join(missing)
         raise ValueError(f"Values for {missing_str} must be included in `replacements`.")

From 196e7029e57f278b1d3b088446f1aecf813767e2 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Sun, 7 Nov 2021 17:43:33 -0800
Subject: [PATCH 26/32] dont force blas version in windows dev enviornment
 (roll back changes)

---
 conda-envs/windows-environment-dev-py38.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/conda-envs/windows-environment-dev-py38.yml b/conda-envs/windows-environment-dev-py38.yml
index 9dda7976d14..2f143a72af0 100644
--- a/conda-envs/windows-environment-dev-py38.yml
+++ b/conda-envs/windows-environment-dev-py38.yml
@@ -11,8 +11,6 @@ dependencies:
 - cloudpickle
 - fastprogress>=0.2.0
 - h5py
-- mkl==2020.4
-- mkl-service==2.3.0
 - numpy>=1.15.0
 - pandas
 - pip

From d02e32065f8be770a58f9299fe7cb40eb48bb414 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Mon, 8 Nov 2021 17:22:17 -0800
Subject: [PATCH 27/32] update release notes

---
 RELEASE-NOTES.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 6d58c0adca1..8555362ca83 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -45,8 +45,14 @@ All of the above apply to:
 - Changes to the BART implementation:
   - A BART variable can be combined with other random variables. The `inv_link` argument has been removed (see [4914](https://github.com/pymc-devs/pymc3/pull/4914)).
   - Moved BART to its own module (see [5058](https://github.com/pymc-devs/pymc3/pull/5058)).
-- ...
-
+- Changes to the Gaussian Process (GP) submodule:
+  - For all implementations, `gp.Latent`, `gp.Marginal` etc., `cov_func` and `mean_func` are required kwargs.
+  - In Windows test conda environment the `mkl` version is fixed to verison 2020.4, and `mkl-service` is fixed to `2.3.0`.  This was required for `gp.MarginalKron` to function properly.
+  - `gp.MvStudentT` uses rotated samples from `StudentT` directly now, instead of sampling from `pm.Chi2` and then from `pm.Normal`.
+  - The "jitter" parameter, or the diagonal noise term added to Gram matrices such that the Cholesky is numerically stable, is now exposed to the user instead of hard-coded.  See the function `gp.util.stabilize`.
+  - The `is_observed` arguement for `gp.Marginal*` implementations has been deprecated.
+  - In the gp.utils file, the `kmeans_inducing_points` function now passes through `kmeans_kwargs` to scipy's k-means function.
+  - The function `replace_with_values` function has been added to `gp.utils`.
 
 ### Expected breaks
 + New API was already available in `v3`.

From 8fee214a1e8ba51cc0109b767b8ee9aee8e42f79 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Mon, 8 Nov 2021 17:23:03 -0800
Subject: [PATCH 28/32] add removed ... line from release notes

---
 RELEASE-NOTES.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 8555362ca83..35c4f64e73a 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -53,6 +53,7 @@ All of the above apply to:
   - The `is_observed` arguement for `gp.Marginal*` implementations has been deprecated.
   - In the gp.utils file, the `kmeans_inducing_points` function now passes through `kmeans_kwargs` to scipy's k-means function.
   - The function `replace_with_values` function has been added to `gp.utils`.
+  - ...
 
 ### Expected breaks
 + New API was already available in `v3`.

From d5474a7d7cb96a363a4f4cc375a8f98bd3ebb743 Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Fri, 12 Nov 2021 16:35:28 -0800
Subject: [PATCH 29/32] add link to PR

---
 RELEASE-NOTES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 35c4f64e73a..ac58184d6a6 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -45,7 +45,7 @@ All of the above apply to:
 - Changes to the BART implementation:
   - A BART variable can be combined with other random variables. The `inv_link` argument has been removed (see [4914](https://github.com/pymc-devs/pymc3/pull/4914)).
   - Moved BART to its own module (see [5058](https://github.com/pymc-devs/pymc3/pull/5058)).
-- Changes to the Gaussian Process (GP) submodule:
+- Changes to the Gaussian Process (GP) submodule (see [5055](https://github.com/pymc-devs/pymc/pull/5055)):
   - For all implementations, `gp.Latent`, `gp.Marginal` etc., `cov_func` and `mean_func` are required kwargs.
   - In Windows test conda environment the `mkl` version is fixed to verison 2020.4, and `mkl-service` is fixed to `2.3.0`.  This was required for `gp.MarginalKron` to function properly.
   - `gp.MvStudentT` uses rotated samples from `StudentT` directly now, instead of sampling from `pm.Chi2` and then from `pm.Normal`.

From 53485759aab677ea112801bde8eb0251a7d9b8ba Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Sat, 20 Nov 2021 14:16:47 -0800
Subject: [PATCH 30/32] remove is_observed usage from TestMarginalVsLatent

---
 pymc/tests/test_gp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index 48ca02bd306..a4dae696848 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -809,7 +809,7 @@ def setup_method(self):
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
             gp = pm.gp.Marginal(mean_func=mean_func, cov_func=cov_func)
-            f = gp.marginal_likelihood("f", X, y, noise=0.0, is_observed=False, observed=y)
+            f = gp.marginal_likelihood("f", X, y, noise=0.0)
             p = gp.conditional("p", Xnew)
         self.logp = model.logp({"p": pnew})
         self.X = X

From c2b850d0097312cd7518645a7694ac0fa333f42e Mon Sep 17 00:00:00 2001
From: bwengals <w.j.engels@gmail.com>
Date: Sat, 20 Nov 2021 14:26:23 -0800
Subject: [PATCH 31/32] remove is_observed usage from
 TestMarginalVsMarginalSparse

---
 pymc/tests/test_gp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc/tests/test_gp.py b/pymc/tests/test_gp.py
index a4dae696848..7ae9b23c866 100644
--- a/pymc/tests/test_gp.py
+++ b/pymc/tests/test_gp.py
@@ -894,7 +894,7 @@ def testPredictCov(self):
             cov_func = pm.gp.cov.ExpQuad(3, [0.1, 0.2, 0.3])
             mean_func = pm.gp.mean.Constant(0.5)
             gp = pm.gp.MarginalSparse(mean_func=mean_func, cov_func=cov_func, approx="DTC")
-            f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma, is_observed=False)
+            f = gp.marginal_likelihood("f", self.X, self.X, self.y, self.sigma)
             mu1, cov1 = self.gp.predict(self.Xnew, pred_noise=True)
             mu2, cov2 = gp.predict(self.Xnew, pred_noise=True)
         npt.assert_allclose(mu1, mu2, atol=0, rtol=1e-3)

From 461d26bc1828f19100b1b038d6b83c1f4d125989 Mon Sep 17 00:00:00 2001
From: Thomas Wiecki <thomas.wiecki@gmail.com>
Date: Sun, 21 Nov 2021 09:02:29 +0100
Subject: [PATCH 32/32] Update RELEASE-NOTES.md

---
 RELEASE-NOTES.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index ae3d190cf91..f3035fe825e 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -53,7 +53,7 @@ All of the above apply to:
   - The `is_observed` arguement for `gp.Marginal*` implementations has been deprecated.
   - In the gp.utils file, the `kmeans_inducing_points` function now passes through `kmeans_kwargs` to scipy's k-means function.
   - The function `replace_with_values` function has been added to `gp.utils`.
-  - ...
+- ...
 
 ### Expected breaks
 + New API was already available in `v3`.