pymc-devs · twiecki · Jan 19, 2021 · Jan 4, 2021 · Jan 11, 2021 · Jan 11, 2021
diff --git a/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst b/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst
@@ -158,7 +158,7 @@ We can now use `scipy.optimize.newton` to find the root::
     def mu_from_theta(theta):
         return optimize.newton(func, 1, fprime=jac, args=(theta,))
 
-We could wrap `mu_from_theta` with `tt.as_op` and use gradient-free
+We could wrap `mu_from_theta` with `theano.compile.ops.as_op` and use gradient-free
 methods like Metropolis, but to get NUTS and ADVI working, we also
 need to define the derivative of `mu_from_theta`. We can find this
 derivative using the implicit function theorem, or equivalently we
@@ -186,8 +186,9 @@ Now, we use this to define a theano op, that also computes the gradient::
     import theano
     import theano.tensor as tt
     import theano.tests.unittest_tools
+    from theano.graph.op import Op
 
-    class MuFromTheta(tt.Op):
+    class MuFromTheta(Op):
         itypes = [tt.dscalar]
         otypes = [tt.dscalar]
 

diff --git a/pymc3/data.py b/pymc3/data.py
@@ -26,6 +26,8 @@
 import theano
 import theano.tensor as tt
 
+from theano.graph.basic import Apply
+
 import pymc3 as pm
 
 __all__ = [
@@ -294,7 +296,7 @@ class Minibatch(tt.TensorVariable):
 
     RNG = collections.defaultdict(list)  # type: Dict[str, List[Any]]
 
-    @theano.configparser.change_flags(compute_test_value="raise")
+    @theano.config.change_flags(compute_test_value="raise")
     def __init__(
         self,
         data,
@@ -320,7 +322,7 @@ def __init__(
         minibatch = tt.patternbroadcast(minibatch, broadcastable)
         self.minibatch = minibatch
         super().__init__(self.minibatch.type, None, None, name=name)
-        theano.Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self])
+        Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self])
         self.tag.test_value = copy(self.minibatch.tag.test_value)
 
     def rslice(self, total, size, seed):

diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
@@ -27,6 +27,8 @@
 
 from theano import scan
 from theano.compile.builders import OpFromGraph
+from theano.graph.basic import Apply
+from theano.graph.op import Op
 from theano.scalar import UnaryScalarOp, upgrade_to_float_no_complex
 from theano.scan import until
 from theano.tensor.slinalg import Cholesky
@@ -312,7 +314,7 @@ def dlogp(inputs, gradients):
     return OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
 
 
-class SplineWrapper(theano.Op):
+class SplineWrapper(Op):
     """
     Creates a theano operation from scipy.interpolate.UnivariateSpline
     """
@@ -324,7 +326,7 @@ def __init__(self, spline):
 
     def make_node(self, x):
         x = tt.as_tensor_variable(x)
-        return tt.Apply(self, [x], [x.type()])
+        return Apply(self, [x], [x.type()])
 
     @property
     def grad_op(self):

diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
@@ -29,6 +29,7 @@
 
 import numpy as np
 import theano
+import theano.graph.basic
 import theano.tensor as tt
 
 from theano import function
@@ -790,7 +791,7 @@ def draw_values(params, point=None, size=None):
                     value = _draw_value(next_, point=point, givens=temp_givens, size=size)
                     givens[next_.name] = (next_, value)
                     drawn[(next_, size)] = value
-                except theano.gof.fg.MissingInputError:
+                except theano.graph.fg.MissingInputError:
                     # The node failed, so we must add the node's parents to
                     # the stack of nodes to try to draw from. We exclude the
                     # nodes in the `params` list.
@@ -833,7 +834,7 @@ def draw_values(params, point=None, size=None):
                         value = _draw_value(param, point=point, givens=givens.values(), size=size)
                         evaluated[param_idx] = drawn[(param, size)] = value
                         givens[param.name] = (param, value)
-                    except theano.gof.fg.MissingInputError:
+                    except theano.graph.fg.MissingInputError:
                         missing_inputs.add(param_idx)
 
     return [evaluated[j] for j in params]  # set the order back
@@ -994,7 +995,7 @@ def _draw_value(param, point=None, givens=None, size=None):
                 variables = values = []
             # We only truly care if the ancestors of param that were given
             # value have the matching dshape and val.shape
-            param_ancestors = set(theano.gof.graph.ancestors([param], blockers=list(variables)))
+            param_ancestors = set(theano.graph.basic.ancestors([param], blockers=list(variables)))
             inputs = [(var, val) for var, val in zip(variables, values) if var in param_ancestors]
             if inputs:
                 input_vars, input_vals = list(zip(*inputs))

diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
@@ -253,7 +253,7 @@ def _comp_logp(self, value):
                 val_shape = tuple(value.shape.eval())
             except AttributeError:
                 val_shape = value.shape
-            except theano.gof.MissingInputError:
+            except theano.graph.fg.MissingInputError:
                 val_shape = None
             try:
                 self_shape = tuple(self.shape)

diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
@@ -23,8 +23,9 @@
 import theano.tensor as tt
 
 from scipy import linalg, stats
-from theano.gof.op import get_test_value
-from theano.gof.utils import TestValueError
+from theano.graph.basic import Apply
+from theano.graph.op import Op, get_test_value
+from theano.graph.utils import TestValueError
 from theano.tensor.nlinalg import det, eigh, matrix_inverse, trace
 from theano.tensor.slinalg import Cholesky
 
@@ -835,7 +836,7 @@ def posdef(AA):
         return 0
 
 
-class PosDefMatrix(theano.Op):
+class PosDefMatrix(Op):
     """
     Check if input is positive definite. Input should be a square matrix.
 
@@ -850,7 +851,7 @@ def make_node(self, x):
         x = tt.as_tensor_variable(x)
         assert x.ndim == 2
         o = tt.TensorType(dtype="int8", broadcastable=[])()
-        return theano.Apply(self, [x], [o])
+        return Apply(self, [x], [o])
 
     # Python implementation:
     def perform(self, node, inputs, outputs):

diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py
@@ -22,7 +22,8 @@
 )
 
 import numpy as np
-import theano
+import theano.graph.basic
+import theano.graph.fg
 import theano.tensor as tt
 
 from arviz import InferenceData
@@ -422,7 +423,7 @@ def draw_values(self) -> List[np.ndarray]:
                         assert isinstance(value, np.ndarray)
                         givens[next_.name] = (next_, value)
                         drawn[(next_, samples)] = value
-                    except theano.gof.fg.MissingInputError:
+                    except theano.graph.fg.MissingInputError:
                         # The node failed, so we must add the node's parents to
                         # the stack of nodes to try to draw from. We exclude the
                         # nodes in the `params` list.
@@ -467,7 +468,7 @@ def draw_values(self) -> List[np.ndarray]:
                             assert isinstance(value, np.ndarray)
                             self.evaluated[param_idx] = drawn[(param, samples)] = value
                             givens[param.name] = (param, value)
-                        except theano.gof.fg.MissingInputError:
+                        except theano.graph.fg.MissingInputError:
                             missing_inputs.add(param_idx)
         return [self.evaluated[j] for j in params]
 
@@ -661,7 +662,9 @@ def random_sample(
                     variables = values = []
                 # We only truly care if the ancestors of param that were given
                 # value have the matching dshape and val.shape
-                param_ancestors = set(theano.gof.graph.ancestors([param], blockers=list(variables)))
+                param_ancestors = set(
+                    theano.graph.basic.ancestors([param], blockers=list(variables))
+                )
                 inputs = [
                     (var, val) for var, val in zip(variables, values) if var in param_ancestors
                 ]

diff --git a/pymc3/math.py b/pymc3/math.py
@@ -25,7 +25,8 @@
 import theano.tensor.slinalg  # pylint: disable=unused-import
 
 from scipy.linalg import block_diag as scipy_block_diag
-from theano.gof import Apply, Op
+from theano.graph.basic import Apply
+from theano.graph.op import Op
 
 # pylint: disable=unused-import
 from theano.tensor import (
@@ -340,7 +341,7 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
         return tt.set_subtensor(out[idxs], packed)
 
 
-class BatchedDiag(tt.Op):
+class BatchedDiag(Op):
     """
     Fast BatchedDiag allocation
     """
@@ -352,7 +353,7 @@ def make_node(self, diag):
         if diag.type.ndim != 2:
             raise TypeError("data argument must be a matrix", diag.type)
 
-        return tt.Apply(self, [diag], [tt.tensor3(dtype=diag.dtype)])
+        return Apply(self, [diag], [tt.tensor3(dtype=diag.dtype)])
 
     def perform(self, node, ins, outs, params=None):
         (C,) = ins
@@ -408,7 +409,7 @@ def make_node(self, *matrices):
             out_type = theano.sparse.matrix(self.format, dtype=largest_common_dtype(matrices))
         else:
             out_type = theano.tensor.matrix(dtype=largest_common_dtype(matrices))
-        return tt.Apply(self, matrices, [out_type])
+        return Apply(self, matrices, [out_type])
 
     def perform(self, node, inputs, output_storage, params=None):
         dtype = largest_common_dtype(inputs)

diff --git a/pymc3/model.py b/pymc3/model.py
@@ -23,11 +23,13 @@
 import numpy as np
 import scipy.sparse as sps
 import theano
+import theano.graph.basic
 import theano.sparse as sparse
 import theano.tensor as tt
 
 from pandas import Series
 from theano.compile import SharedVariable
+from theano.graph.basic import Apply
 from theano.tensor.var import TensorVariable
 
 import pymc3 as pm
@@ -283,7 +285,7 @@ def __enter__(self):
             # self._theano_config is set in Model.__new__
             self._config_context = None
             if hasattr(self, "_theano_config"):
-                self._config_context = theano.change_flags(**self._theano_config)
+                self._config_context = theano.config.change_flags(**self._theano_config)
                 self._config_context.__enter__()
             return self
 
@@ -1703,7 +1705,7 @@ def pandas_to_array(data):
             ret = data
         else:  # empty mask
             ret = data.filled()
-    elif isinstance(data, theano.gof.graph.Variable):
+    elif isinstance(data, theano.graph.basic.Variable):
         ret = data
     elif sps.issparse(data):
         ret = data
@@ -1794,7 +1796,7 @@ def __init__(
 
         if type is None:
             data = pandas_to_array(data)
-            if isinstance(data, theano.gof.graph.Variable):
+            if isinstance(data, theano.graph.basic.Variable):
                 type = data.type
             else:
                 type = TensorType(distribution.dtype, data.shape)
@@ -1817,7 +1819,7 @@ def __init__(
             self.distribution = distribution
 
             # make this RV a view on the combined missing/nonmissing array
-            theano.gof.Apply(theano.compile.view_op, inputs=[data], outputs=[self])
+            Apply(theano.compile.view_op, inputs=[data], outputs=[self])
             self.tag.test_value = theano.compile.view_op(data).tag.test_value.astype(self.dtype)
             self.scaling = _get_scaling(total_size, data.shape, data.ndim)
 
@@ -1997,7 +1999,7 @@ def __init__(
 
             normalRV = transform.backward(self.transformed)
 
-            theano.Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self])
+            Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self])
             self.tag.test_value = normalRV.tag.test_value
             self.scaling = _get_scaling(total_size, self.shape, self.ndim)
             incorporate_methods(

diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py
@@ -18,7 +18,7 @@
 VarName = str
 
 from theano.compile import SharedVariable
-from theano.gof.graph import stack_search
+from theano.graph.basic import walk
 from theano.tensor import Tensor
 
 import pymc3 as pm
@@ -69,7 +69,7 @@ def _expand(node) -> Optional[Iterator[Tensor]]:
             else:
                 return None
 
-        stack_search(start=deque([func]), expand=_expand, mode="bfs")
+        list(walk(deque([func]), _expand, bfs=True))
         return retval
 
     def _filter_parents(self, var, parents) -> Set[VarName]:

diff --git a/pymc3/ode/ode.py b/pymc3/ode/ode.py
@@ -19,7 +19,8 @@
 import theano
 import theano.tensor as tt
 
-from theano.gof.op import get_test_value
+from theano.graph.basic import Apply
+from theano.graph.op import Op, get_test_value
 
 from pymc3.exceptions import DtypeError, ShapeError
 from pymc3.ode import utils
@@ -28,7 +29,7 @@
 floatX = theano.config.floatX
 
 
-class DifferentialEquation(theano.Op):
+class DifferentialEquation(Op):
     r"""
     Specify an ordinary differential equation
 
@@ -141,7 +142,7 @@ def make_node(self, y0, theta):
 
         # store symbolic output in dictionary such that it can be accessed in the grad method
         self._output_sensitivities[hash(inputs)] = sens
-        return theano.Apply(self, inputs, (states, sens))
+        return Apply(self, inputs, (states, sens))
 
     def __call__(self, y0, theta, return_sens=False, **kwargs):
         if isinstance(y0, (list, tuple)) and not len(y0) == self.n_states:
@@ -162,7 +163,7 @@ def __call__(self, y0, theta, return_sens=False, **kwargs):
                 )
 
         # use default implementation to prepare symbolic outputs (via make_node)
-        states, sens = super(theano.Op, self).__call__(y0, theta, **kwargs)
+        states, sens = super().__call__(y0, theta, **kwargs)
 
         if theano.config.compute_test_value != "off":
             # compute test values from input test values

diff --git a/pymc3/sampling_jax.py b/pymc3/sampling_jax.py
@@ -11,7 +11,7 @@
 import jax
 import numpy as np
 import pandas as pd
-import theano
+import theano.graph.fg
 
 from theano.link.jax.jax_dispatch import jax_funcify
 
@@ -45,7 +45,7 @@ def sample_tfp_nuts(
 
     seed = jax.random.PRNGKey(random_seed)
 
-    fgraph = theano.gof.FunctionGraph(model.free_RVs, [model.logpt])
+    fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, [model.logpt])
     fns = jax_funcify(fgraph)
     logp_fn_jax = fns[0]
 
@@ -130,7 +130,7 @@ def sample_numpyro_nuts(
 
     seed = jax.random.PRNGKey(random_seed)
 
-    fgraph = theano.gof.FunctionGraph(model.free_RVs, [model.logpt])
+    fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, [model.logpt])
     fns = jax_funcify(fgraph)
     logp_fn_jax = fns[0]
 

diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py
@@ -31,7 +31,7 @@
     searchsorted,
 )
 from numpy.random import uniform
-from theano.gof.graph import inputs
+from theano.graph.basic import graph_inputs
 from theano.tensor import add
 
 from pymc3.distributions.discrete import Categorical
@@ -80,7 +80,7 @@ def competence(var, has_grad):
 
 
 def elemwise_logp(model, var):
-    terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([v.logpt])]
+    terms = [v.logp_elemwiset for v in model.basic_RVs if var in graph_inputs([v.logpt])]
     return model.fn(add(*terms))