diff --git a/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst b/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst
index ce076427b28..ba0df7cad46 100644
--- a/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst
+++ b/docs/source/Advanced_usage_of_Theano_in_PyMC3.rst
@@ -158,7 +158,7 @@ We can now use `scipy.optimize.newton` to find the root::
     def mu_from_theta(theta):
         return optimize.newton(func, 1, fprime=jac, args=(theta,))
 
-We could wrap `mu_from_theta` with `tt.as_op` and use gradient-free
+We could wrap `mu_from_theta` with `theano.compile.ops.as_op` and use gradient-free
 methods like Metropolis, but to get NUTS and ADVI working, we also
 need to define the derivative of `mu_from_theta`. We can find this
 derivative using the implicit function theorem, or equivalently we
@@ -186,8 +186,9 @@ Now, we use this to define a theano op, that also computes the gradient::
     import theano
     import theano.tensor as tt
     import theano.tests.unittest_tools
+    from theano.graph.op import Op
 
-    class MuFromTheta(tt.Op):
+    class MuFromTheta(Op):
         itypes = [tt.dscalar]
         otypes = [tt.dscalar]
 
diff --git a/pymc3/data.py b/pymc3/data.py
index 8c7092538c6..4cdb793aa33 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -26,6 +26,8 @@
 import theano
 import theano.tensor as tt
 
+from theano.graph.basic import Apply
+
 import pymc3 as pm
 
 __all__ = [
@@ -294,7 +296,7 @@ class Minibatch(tt.TensorVariable):
 
     RNG = collections.defaultdict(list)  # type: Dict[str, List[Any]]
 
-    @theano.configparser.change_flags(compute_test_value="raise")
+    @theano.config.change_flags(compute_test_value="raise")
     def __init__(
         self,
         data,
@@ -320,7 +322,7 @@ def __init__(
         minibatch = tt.patternbroadcast(minibatch, broadcastable)
         self.minibatch = minibatch
         super().__init__(self.minibatch.type, None, None, name=name)
-        theano.Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self])
+        Apply(theano.compile.view_op, inputs=[self.minibatch], outputs=[self])
         self.tag.test_value = copy(self.minibatch.tag.test_value)
 
     def rslice(self, total, size, seed):
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
index 12d58313305..70877722271 100644
--- a/pymc3/distributions/dist_math.py
+++ b/pymc3/distributions/dist_math.py
@@ -27,6 +27,8 @@
 
 from theano import scan
 from theano.compile.builders import OpFromGraph
+from theano.graph.basic import Apply
+from theano.graph.op import Op
 from theano.scalar import UnaryScalarOp, upgrade_to_float_no_complex
 from theano.scan import until
 from theano.tensor.slinalg import Cholesky
@@ -312,7 +314,7 @@ def dlogp(inputs, gradients):
     return OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
 
 
-class SplineWrapper(theano.Op):
+class SplineWrapper(Op):
     """
     Creates a theano operation from scipy.interpolate.UnivariateSpline
     """
@@ -324,7 +326,7 @@ def __init__(self, spline):
 
     def make_node(self, x):
         x = tt.as_tensor_variable(x)
-        return tt.Apply(self, [x], [x.type()])
+        return Apply(self, [x], [x.type()])
 
     @property
     def grad_op(self):
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index 2ab840c5362..8178ae0d228 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -29,6 +29,7 @@
 
 import numpy as np
 import theano
+import theano.graph.basic
 import theano.tensor as tt
 
 from theano import function
@@ -790,7 +791,7 @@ def draw_values(params, point=None, size=None):
                     value = _draw_value(next_, point=point, givens=temp_givens, size=size)
                     givens[next_.name] = (next_, value)
                     drawn[(next_, size)] = value
-                except theano.gof.fg.MissingInputError:
+                except theano.graph.fg.MissingInputError:
                     # The node failed, so we must add the node's parents to
                     # the stack of nodes to try to draw from. We exclude the
                     # nodes in the `params` list.
@@ -833,7 +834,7 @@ def draw_values(params, point=None, size=None):
                         value = _draw_value(param, point=point, givens=givens.values(), size=size)
                         evaluated[param_idx] = drawn[(param, size)] = value
                         givens[param.name] = (param, value)
-                    except theano.gof.fg.MissingInputError:
+                    except theano.graph.fg.MissingInputError:
                         missing_inputs.add(param_idx)
 
     return [evaluated[j] for j in params]  # set the order back
@@ -994,7 +995,7 @@ def _draw_value(param, point=None, givens=None, size=None):
                 variables = values = []
             # We only truly care if the ancestors of param that were given
             # value have the matching dshape and val.shape
-            param_ancestors = set(theano.gof.graph.ancestors([param], blockers=list(variables)))
+            param_ancestors = set(theano.graph.basic.ancestors([param], blockers=list(variables)))
             inputs = [(var, val) for var, val in zip(variables, values) if var in param_ancestors]
             if inputs:
                 input_vars, input_vals = list(zip(*inputs))
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 75471d18229..756269d3306 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -253,7 +253,7 @@ def _comp_logp(self, value):
                 val_shape = tuple(value.shape.eval())
             except AttributeError:
                 val_shape = value.shape
-            except theano.gof.MissingInputError:
+            except theano.graph.fg.MissingInputError:
                 val_shape = None
             try:
                 self_shape = tuple(self.shape)
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index a1363879122..8e99ccee228 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -23,8 +23,9 @@
 import theano.tensor as tt
 
 from scipy import linalg, stats
-from theano.gof.op import get_test_value
-from theano.gof.utils import TestValueError
+from theano.graph.basic import Apply
+from theano.graph.op import Op, get_test_value
+from theano.graph.utils import TestValueError
 from theano.tensor.nlinalg import det, eigh, matrix_inverse, trace
 from theano.tensor.slinalg import Cholesky
 
@@ -835,7 +836,7 @@ def posdef(AA):
         return 0
 
 
-class PosDefMatrix(theano.Op):
+class PosDefMatrix(Op):
     """
     Check if input is positive definite. Input should be a square matrix.
 
@@ -850,7 +851,7 @@ def make_node(self, x):
         x = tt.as_tensor_variable(x)
         assert x.ndim == 2
         o = tt.TensorType(dtype="int8", broadcastable=[])()
-        return theano.Apply(self, [x], [o])
+        return Apply(self, [x], [o])
 
     # Python implementation:
     def perform(self, node, inputs, outputs):
diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py
index 4d6c1f45966..47c4ce0fa0e 100644
--- a/pymc3/distributions/posterior_predictive.py
+++ b/pymc3/distributions/posterior_predictive.py
@@ -22,7 +22,8 @@
 )
 
 import numpy as np
-import theano
+import theano.graph.basic
+import theano.graph.fg
 import theano.tensor as tt
 
 from arviz import InferenceData
@@ -422,7 +423,7 @@ def draw_values(self) -> List[np.ndarray]:
                         assert isinstance(value, np.ndarray)
                         givens[next_.name] = (next_, value)
                         drawn[(next_, samples)] = value
-                    except theano.gof.fg.MissingInputError:
+                    except theano.graph.fg.MissingInputError:
                         # The node failed, so we must add the node's parents to
                         # the stack of nodes to try to draw from. We exclude the
                         # nodes in the `params` list.
@@ -467,7 +468,7 @@ def draw_values(self) -> List[np.ndarray]:
                             assert isinstance(value, np.ndarray)
                             self.evaluated[param_idx] = drawn[(param, samples)] = value
                             givens[param.name] = (param, value)
-                        except theano.gof.fg.MissingInputError:
+                        except theano.graph.fg.MissingInputError:
                             missing_inputs.add(param_idx)
         return [self.evaluated[j] for j in params]
 
@@ -661,7 +662,9 @@ def random_sample(
                     variables = values = []
                 # We only truly care if the ancestors of param that were given
                 # value have the matching dshape and val.shape
-                param_ancestors = set(theano.gof.graph.ancestors([param], blockers=list(variables)))
+                param_ancestors = set(
+                    theano.graph.basic.ancestors([param], blockers=list(variables))
+                )
                 inputs = [
                     (var, val) for var, val in zip(variables, values) if var in param_ancestors
                 ]
diff --git a/pymc3/math.py b/pymc3/math.py
index 17a7a7f67d1..fc2a55823c2 100644
--- a/pymc3/math.py
+++ b/pymc3/math.py
@@ -25,7 +25,8 @@
 import theano.tensor.slinalg  # pylint: disable=unused-import
 
 from scipy.linalg import block_diag as scipy_block_diag
-from theano.gof import Apply, Op
+from theano.graph.basic import Apply
+from theano.graph.op import Op
 
 # pylint: disable=unused-import
 from theano.tensor import (
@@ -340,7 +341,7 @@ def expand_packed_triangular(n, packed, lower=True, diagonal_only=False):
         return tt.set_subtensor(out[idxs], packed)
 
 
-class BatchedDiag(tt.Op):
+class BatchedDiag(Op):
     """
     Fast BatchedDiag allocation
     """
@@ -352,7 +353,7 @@ def make_node(self, diag):
         if diag.type.ndim != 2:
             raise TypeError("data argument must be a matrix", diag.type)
 
-        return tt.Apply(self, [diag], [tt.tensor3(dtype=diag.dtype)])
+        return Apply(self, [diag], [tt.tensor3(dtype=diag.dtype)])
 
     def perform(self, node, ins, outs, params=None):
         (C,) = ins
@@ -408,7 +409,7 @@ def make_node(self, *matrices):
             out_type = theano.sparse.matrix(self.format, dtype=largest_common_dtype(matrices))
         else:
             out_type = theano.tensor.matrix(dtype=largest_common_dtype(matrices))
-        return tt.Apply(self, matrices, [out_type])
+        return Apply(self, matrices, [out_type])
 
     def perform(self, node, inputs, output_storage, params=None):
         dtype = largest_common_dtype(inputs)
diff --git a/pymc3/model.py b/pymc3/model.py
index 793bbefc455..393c4d2f6a2 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -23,11 +23,13 @@
 import numpy as np
 import scipy.sparse as sps
 import theano
+import theano.graph.basic
 import theano.sparse as sparse
 import theano.tensor as tt
 
 from pandas import Series
 from theano.compile import SharedVariable
+from theano.graph.basic import Apply
 from theano.tensor.var import TensorVariable
 
 import pymc3 as pm
@@ -283,7 +285,7 @@ def __enter__(self):
             # self._theano_config is set in Model.__new__
             self._config_context = None
             if hasattr(self, "_theano_config"):
-                self._config_context = theano.change_flags(**self._theano_config)
+                self._config_context = theano.config.change_flags(**self._theano_config)
                 self._config_context.__enter__()
             return self
 
@@ -1703,7 +1705,7 @@ def pandas_to_array(data):
             ret = data
         else:  # empty mask
             ret = data.filled()
-    elif isinstance(data, theano.gof.graph.Variable):
+    elif isinstance(data, theano.graph.basic.Variable):
         ret = data
     elif sps.issparse(data):
         ret = data
@@ -1794,7 +1796,7 @@ def __init__(
 
         if type is None:
             data = pandas_to_array(data)
-            if isinstance(data, theano.gof.graph.Variable):
+            if isinstance(data, theano.graph.basic.Variable):
                 type = data.type
             else:
                 type = TensorType(distribution.dtype, data.shape)
@@ -1817,7 +1819,7 @@ def __init__(
             self.distribution = distribution
 
             # make this RV a view on the combined missing/nonmissing array
-            theano.gof.Apply(theano.compile.view_op, inputs=[data], outputs=[self])
+            Apply(theano.compile.view_op, inputs=[data], outputs=[self])
             self.tag.test_value = theano.compile.view_op(data).tag.test_value.astype(self.dtype)
             self.scaling = _get_scaling(total_size, data.shape, data.ndim)
 
@@ -1997,7 +1999,7 @@ def __init__(
 
             normalRV = transform.backward(self.transformed)
 
-            theano.Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self])
+            Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self])
             self.tag.test_value = normalRV.tag.test_value
             self.scaling = _get_scaling(total_size, self.shape, self.ndim)
             incorporate_methods(
diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py
index 3e977f25d45..cd3feb30709 100644
--- a/pymc3/model_graph.py
+++ b/pymc3/model_graph.py
@@ -18,7 +18,7 @@
 VarName = str
 
 from theano.compile import SharedVariable
-from theano.gof.graph import stack_search
+from theano.graph.basic import walk
 from theano.tensor import Tensor
 
 import pymc3 as pm
@@ -69,7 +69,7 @@ def _expand(node) -> Optional[Iterator[Tensor]]:
             else:
                 return None
 
-        stack_search(start=deque([func]), expand=_expand, mode="bfs")
+        list(walk(deque([func]), _expand, bfs=True))
         return retval
 
     def _filter_parents(self, var, parents) -> Set[VarName]:
diff --git a/pymc3/ode/ode.py b/pymc3/ode/ode.py
index e15f370ea8a..2eba398404f 100644
--- a/pymc3/ode/ode.py
+++ b/pymc3/ode/ode.py
@@ -19,7 +19,8 @@
 import theano
 import theano.tensor as tt
 
-from theano.gof.op import get_test_value
+from theano.graph.basic import Apply
+from theano.graph.op import Op, get_test_value
 
 from pymc3.exceptions import DtypeError, ShapeError
 from pymc3.ode import utils
@@ -28,7 +29,7 @@
 floatX = theano.config.floatX
 
 
-class DifferentialEquation(theano.Op):
+class DifferentialEquation(Op):
     r"""
     Specify an ordinary differential equation
 
@@ -141,7 +142,7 @@ def make_node(self, y0, theta):
 
         # store symbolic output in dictionary such that it can be accessed in the grad method
         self._output_sensitivities[hash(inputs)] = sens
-        return theano.Apply(self, inputs, (states, sens))
+        return Apply(self, inputs, (states, sens))
 
     def __call__(self, y0, theta, return_sens=False, **kwargs):
         if isinstance(y0, (list, tuple)) and not len(y0) == self.n_states:
@@ -162,7 +163,7 @@ def __call__(self, y0, theta, return_sens=False, **kwargs):
                 )
 
         # use default implementation to prepare symbolic outputs (via make_node)
-        states, sens = super(theano.Op, self).__call__(y0, theta, **kwargs)
+        states, sens = super().__call__(y0, theta, **kwargs)
 
         if theano.config.compute_test_value != "off":
             # compute test values from input test values
diff --git a/pymc3/sampling_jax.py b/pymc3/sampling_jax.py
index 6e250059dbe..e02d21cb3cb 100644
--- a/pymc3/sampling_jax.py
+++ b/pymc3/sampling_jax.py
@@ -11,7 +11,7 @@
 import jax
 import numpy as np
 import pandas as pd
-import theano
+import theano.graph.fg
 
 from theano.link.jax.jax_dispatch import jax_funcify
 
@@ -45,7 +45,7 @@ def sample_tfp_nuts(
 
     seed = jax.random.PRNGKey(random_seed)
 
-    fgraph = theano.gof.FunctionGraph(model.free_RVs, [model.logpt])
+    fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, [model.logpt])
     fns = jax_funcify(fgraph)
     logp_fn_jax = fns[0]
 
@@ -130,7 +130,7 @@ def sample_numpyro_nuts(
 
     seed = jax.random.PRNGKey(random_seed)
 
-    fgraph = theano.gof.FunctionGraph(model.free_RVs, [model.logpt])
+    fgraph = theano.graph.fg.FunctionGraph(model.free_RVs, [model.logpt])
     fns = jax_funcify(fgraph)
     logp_fn_jax = fns[0]
 
diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py
index c1dbd38e71a..2646a8a9e82 100644
--- a/pymc3/step_methods/gibbs.py
+++ b/pymc3/step_methods/gibbs.py
@@ -31,7 +31,7 @@
     searchsorted,
 )
 from numpy.random import uniform
-from theano.gof.graph import inputs
+from theano.graph.basic import graph_inputs
 from theano.tensor import add
 
 from pymc3.distributions.discrete import Categorical
@@ -80,7 +80,7 @@ def competence(var, has_grad):
 
 
 def elemwise_logp(model, var):
-    terms = [v.logp_elemwiset for v in model.basic_RVs if var in inputs([v.logpt])]
+    terms = [v.logp_elemwiset for v in model.basic_RVs if var in graph_inputs([v.logpt])]
     return model.fn(add(*terms))
 
 
diff --git a/pymc3/tests/conftest.py b/pymc3/tests/conftest.py
index 29646256919..e9d38d163ff 100644
--- a/pymc3/tests/conftest.py
+++ b/pymc3/tests/conftest.py
@@ -21,14 +21,14 @@
 
 @pytest.fixture(scope="function", autouse=True)
 def theano_config():
-    config = theano.configparser.change_flags(compute_test_value="raise")
+    config = theano.config.change_flags(compute_test_value="raise")
     with config:
         yield
 
 
 @pytest.fixture(scope="function", autouse=True)
 def exception_verbosity():
-    config = theano.configparser.change_flags(exception_verbosity="high")
+    config = theano.config.change_flags(exception_verbosity="high")
     with config:
         yield
 
@@ -36,7 +36,7 @@ def exception_verbosity():
 @pytest.fixture(scope="function", autouse=False)
 def strict_float32():
     if theano.config.floatX == "float32":
-        config = theano.configparser.change_flags(warn_float64="raise")
+        config = theano.config.change_flags(warn_float64="raise")
         with config:
             yield
     else:
diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py
index 2cf7b8e98f4..de9bbd5b7e5 100644
--- a/pymc3/tests/test_dist_math.py
+++ b/pymc3/tests/test_dist_math.py
@@ -11,6 +11,7 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import sys
 
 import numpy as np
 import numpy.testing as npt
@@ -167,7 +168,7 @@ def test_logp(self):
         logp = logp_f(cov_val, delta_val)
         npt.assert_allclose(logp, expect)
 
-    @theano.configparser.change_flags(compute_test_value="ignore")
+    @theano.config.change_flags(compute_test_value="ignore")
     def test_grad(self):
         np.random.seed(42)
 
@@ -190,7 +191,7 @@ def func(chol_vec, delta):
         verify_grad(func, [chol_vec_val, delta_val])
 
     @pytest.mark.skip(reason="Fix in theano not released yet: Theano#5908")
-    @theano.configparser.change_flags(compute_test_value="ignore")
+    @theano.config.change_flags(compute_test_value="ignore")
     def test_hessian(self):
         chol_vec = tt.vector("chol_vec")
         chol_vec.tag.test_value = np.array([0.1, 2, 3])
@@ -209,14 +210,14 @@ def test_hessian(self):
 
 
 class TestSplineWrapper:
-    @theano.configparser.change_flags(compute_test_value="ignore")
+    @theano.config.change_flags(compute_test_value="ignore")
     def test_grad(self):
         x = np.linspace(0, 1, 100)
         y = x * x
         spline = SplineWrapper(interpolate.InterpolatedUnivariateSpline(x, y, k=1))
         verify_grad(spline, [0.5])
 
-    @theano.configparser.change_flags(compute_test_value="ignore")
+    @theano.config.change_flags(compute_test_value="ignore")
     def test_hessian(self):
         x = np.linspace(0, 1, 100)
         y = x * x
@@ -228,7 +229,7 @@ def test_hessian(self):
 
 
 class TestI0e:
-    @theano.configparser.change_flags(compute_test_value="ignore")
+    @theano.config.change_flags(compute_test_value="ignore")
     def test_grad(self):
         verify_grad(i0e, [0.5])
         verify_grad(i0e, [-2.0])
@@ -236,7 +237,12 @@ def test_grad(self):
         verify_grad(i0e, [[[0.5, -2.0]]])
 
 
-@pytest.mark.parametrize("dtype", ["float16", "float32", "float64", "float128"])
+@pytest.mark.parametrize(
+    "dtype",
+    ["float16", "float32", "float64", "float128"]
+    if sys.platform != "win32"
+    else ["float16", "float32", "float64"],
+)
 def test_clipped_beta_rvs(dtype):
     # Verify that the samples drawn from the beta distribution are never
     # equal to zero or one (issue #3898)
diff --git a/pymc3/tests/test_math.py b/pymc3/tests/test_math.py
index 0f048850b17..74a782555ed 100644
--- a/pymc3/tests/test_math.py
+++ b/pymc3/tests/test_math.py
@@ -160,7 +160,7 @@ def setup_method(self):
         self.op_class = LogDet
         self.op = logdet
 
-    @theano.configparser.change_flags(compute_test_value="ignore")
+    @theano.config.change_flags(compute_test_value="ignore")
     def validate(self, input_mat):
         x = theano.tensor.matrix()
         f = theano.function([x], self.op(x))
diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py
index c4ab92708f5..c8be76c2d72 100644
--- a/pymc3/tests/test_model.py
+++ b/pymc3/tests/test_model.py
@@ -167,13 +167,13 @@ def test_observed_type(self):
 
 class TestTheanoConfig:
     def test_set_testval_raise(self):
-        with theano.configparser.change_flags(compute_test_value="off"):
+        with theano.config.change_flags(compute_test_value="off"):
             with pm.Model():
                 assert theano.config.compute_test_value == "raise"
             assert theano.config.compute_test_value == "off"
 
     def test_nested(self):
-        with theano.configparser.change_flags(compute_test_value="off"):
+        with theano.config.change_flags(compute_test_value="off"):
             with pm.Model(theano_config={"compute_test_value": "ignore"}):
                 assert theano.config.compute_test_value == "ignore"
                 with pm.Model(theano_config={"compute_test_value": "warn"}):
diff --git a/pymc3/tests/test_model_helpers.py b/pymc3/tests/test_model_helpers.py
index 62312c4428a..7b049bac707 100644
--- a/pymc3/tests/test_model_helpers.py
+++ b/pymc3/tests/test_model_helpers.py
@@ -16,6 +16,7 @@
 import numpy.ma as ma
 import numpy.testing as npt
 import pandas as pd
+import pytest
 import scipy.sparse as sps
 import theano
 import theano.sparse as sparse
@@ -25,18 +26,18 @@
 
 
 class TestHelperFunc:
-    def test_pandas_to_array(self):
+    @pytest.mark.parametrize("input_dtype", ["int32", "int64", "float32", "float64"])
+    def test_pandas_to_array(self, input_dtype):
         """
         Ensure that pandas_to_array returns the dense array, masked array,
         graph variable, TensorVariable, or sparse matrix as appropriate.
         """
         # Create the various inputs to the function
-        sparse_input = sps.csr_matrix(np.eye(3))
-        dense_input = np.arange(9).reshape((3, 3))
+        sparse_input = sps.csr_matrix(np.eye(3)).astype(input_dtype)
+        dense_input = np.arange(9).reshape((3, 3)).astype(input_dtype)
 
         input_name = "input_variable"
         theano_graph_input = tt.as_tensor(dense_input, name=input_name)
-
         pandas_input = pd.DataFrame(dense_input)
 
         # All the even numbers are replaced with NaN
@@ -79,8 +80,20 @@ def test_pandas_to_array(self):
 
         # Check function behavior with Theano graph variable
         theano_output = func(theano_graph_input)
-        assert isinstance(theano_output, theano.gof.graph.Variable)
-        assert theano_output.owner.inputs[0].name == input_name
+        assert isinstance(theano_output, theano.graph.basic.Variable)
+        npt.assert_allclose(theano_output.eval(), theano_graph_input.eval())
+        intX = pm.theanof._conversion_map[theano.config.floatX]
+        if dense_input.dtype == intX or dense_input.dtype == theano.config.floatX:
+            assert theano_output.owner is None  # func should not have added new nodes
+            assert theano_output.name == input_name
+        else:
+            assert theano_output.owner is not None  # func should have casted
+            assert theano_output.owner.inputs[0].name == input_name
+
+        if "float" in input_dtype:
+            assert theano_output.dtype == theano.config.floatX
+        else:
+            assert theano_output.dtype == intX
 
         # Check function behavior with generator data
         generator_output = func(square_generator)
diff --git a/pymc3/tests/test_parallel_sampling.py b/pymc3/tests/test_parallel_sampling.py
index 4d5403a9401..e458c609a8b 100644
--- a/pymc3/tests/test_parallel_sampling.py
+++ b/pymc3/tests/test_parallel_sampling.py
@@ -19,6 +19,8 @@
 import theano
 import theano.tensor as tt
 
+from theano.compile.ops import as_op
+
 import pymc3 as pm
 import pymc3.parallel_sampling as ps
 
@@ -61,7 +63,7 @@ def test_bad_unpickle():
 tt_vector = tt.TensorType(theano.config.floatX, [False])
 
 
-@theano.as_op([tt_vector, tt.iscalar], [tt_vector])
+@as_op([tt_vector, tt.iscalar], [tt_vector])
 def _crash_remote_process(a, master_pid):
     if os.getpid() != master_pid:
         os.exit(0)
diff --git a/pymc3/tests/test_random.py b/pymc3/tests/test_random.py
index 5aee45fb2b5..7a4ae42ce22 100644
--- a/pymc3/tests/test_random.py
+++ b/pymc3/tests/test_random.py
@@ -114,9 +114,9 @@ def test_dep_vars(self):
             ]
         )
 
-    def test_gof_constant(self):
+    def test_graph_constant(self):
         # Issue 3595 pointed out that slice(None) can introduce
-        # theano.gof.graph.Constant into the compute graph, which wasn't
+        # theano.graph.basic.Constant into the compute graph, which wasn't
         # handled correctly by draw_values
         n_d = 500
         n_x = 2
diff --git a/pymc3/tests/test_step.py b/pymc3/tests/test_step.py
index d4b59ec6313..6da70f2a7ab 100644
--- a/pymc3/tests/test_step.py
+++ b/pymc3/tests/test_step.py
@@ -26,6 +26,8 @@
 import theano.tensor as tt
 
 from numpy.testing import assert_array_almost_equal
+from theano.compile.ops import as_op
+from theano.graph.op import Op
 
 from pymc3.data import Data
 from pymc3.distributions import (
@@ -719,7 +721,7 @@ def test_normal_nograd_op(self):
             itypes = [tt.dscalar] if is_64 else [tt.fscalar]
             otypes = [tt.dscalar] if is_64 else [tt.fscalar]
 
-            @theano.as_op(itypes, otypes)
+            @as_op(itypes, otypes)
             def kill_grad(x):
                 return x
 
@@ -1456,7 +1458,7 @@ def test_aem_mu_sigma(self):
         np.fill_diagonal(s, sigma ** 2)
 
         # forward model Op - here, just the regression equation
-        class ForwardModel(tt.Op):
+        class ForwardModel(Op):
             if theano.config.floatX == "float32":
                 itypes = [tt.fvector]
                 otypes = [tt.fvector]
@@ -1598,7 +1600,7 @@ def test_variance_reduction(self):
         nchains = 1
 
         # define likelihoods with different Q
-        class Likelihood1(tt.Op):
+        class Likelihood1(Op):
             if theano.config.floatX == "float32":
                 itypes = [tt.fvector]
                 otypes = [tt.fscalar]
@@ -1621,7 +1623,7 @@ def perform(self, node, inputs, outputs):
                     -(0.5 / s ** 2) * np.sum((temp - self.y) ** 2, dtype=p), dtype=p
                 )
 
-        class Likelihood2(tt.Op):
+        class Likelihood2(Op):
             if theano.config.floatX == "float32":
                 itypes = [tt.fvector]
                 otypes = [tt.fscalar]
diff --git a/pymc3/tests/test_types.py b/pymc3/tests/test_types.py
index 98153fca7c7..bd8eaa42df0 100644
--- a/pymc3/tests/test_types.py
+++ b/pymc3/tests/test_types.py
@@ -21,7 +21,6 @@
 from pymc3.model import Model
 from pymc3.sampling import sample
 from pymc3.step_methods import MLDA, NUTS, HamiltonianMC, Metropolis, Slice
-from pymc3.theanof import change_flags
 
 
 class TestType:
@@ -35,7 +34,7 @@ def teardown_method(self):
         # restore theano config
         theano.config = self.theano_config
 
-    @change_flags({"floatX": "float64", "warn_float64": "ignore"})
+    @theano.config.change_flags({"floatX": "float64", "warn_float64": "ignore"})
     def test_float64(self):
         with Model() as model:
             x = Normal("x", testval=np.array(1.0, dtype="float64"))
@@ -48,7 +47,7 @@ def test_float64(self):
             with model:
                 sample(10, sampler())
 
-    @change_flags({"floatX": "float32", "warn_float64": "warn"})
+    @theano.config.change_flags({"floatX": "float32", "warn_float64": "warn"})
     def test_float32(self):
         with Model() as model:
             x = Normal("x", testval=np.array(1.0, dtype="float32"))
@@ -61,7 +60,7 @@ def test_float32(self):
             with model:
                 sample(10, sampler())
 
-    @change_flags({"floatX": "float64", "warn_float64": "ignore"})
+    @theano.config.change_flags({"floatX": "float64", "warn_float64": "ignore"})
     def test_float64_MLDA(self):
         data = np.random.randn(5)
 
@@ -79,7 +78,7 @@ def test_float64_MLDA(self):
         with model:
             sample(10, MLDA(coarse_models=[coarse_model]))
 
-    @change_flags({"floatX": "float32", "warn_float64": "warn"})
+    @theano.config.change_flags({"floatX": "float32", "warn_float64": "warn"})
     def test_float32_MLDA(self):
         data = np.random.randn(5).astype("float32")
 
diff --git a/pymc3/tests/test_updates.py b/pymc3/tests/test_updates.py
index 371ab354b5d..9d8f644075c 100644
--- a/pymc3/tests/test_updates.py
+++ b/pymc3/tests/test_updates.py
@@ -16,8 +16,6 @@
 import pytest
 import theano
 
-from theano.configparser import change_flags
-
 from pymc3.variational.updates import (
     adadelta,
     adagrad,
@@ -73,7 +71,7 @@
     ids=["scalar", "matrix", "mixed"],
 )
 def test_updates_fast(opt, loss_and_params, kwargs, getter):
-    with change_flags(compute_test_value="ignore"):
+    with theano.config.change_flags(compute_test_value="ignore"):
         loss, param = getter(loss_and_params)
         args = dict()
         args.update(**kwargs)
diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py
index dd5d64275df..1ef9b616290 100644
--- a/pymc3/tests/test_variational_inference.py
+++ b/pymc3/tests/test_variational_inference.py
@@ -505,7 +505,7 @@ def test_elbo():
 
     # Create variational gradient tensor
     mean_field = MeanField(model=model)
-    with pm.theanof.change_flags(compute_test_value="off"):
+    with theano.config.change_flags(compute_test_value="off"):
         elbo = -pm.operators.KL(mean_field)()(10000)
 
     mean_field.shared_params["mu"].set_value(post_mu)
@@ -732,7 +732,6 @@ def fit_kwargs(inference, use_minibatch):
     return _select[(type(inference), key)]
 
 
-@pytest.mark.run("first")
 def test_fit_oo(inference, fit_kwargs, simple_model_data):
     trace = inference.fit(**fit_kwargs).sample(10000)
     mu_post = simple_model_data["mu_post"]
@@ -911,7 +910,6 @@ def binomial_model_inference(binomial_model, inference_spec):
         return inference_spec()
 
 
-@pytest.mark.run(after="test_sample_replacements")
 def test_replacements(binomial_model_inference):
     d = tt.bscalar()
     d.tag.test_value = 1
diff --git a/pymc3/theanof.py b/pymc3/theanof.py
index aeeb64e5b02..c40311da6e8 100644
--- a/pymc3/theanof.py
+++ b/pymc3/theanof.py
@@ -15,10 +15,10 @@
 import numpy as np
 import theano
 
-from theano import change_flags, scalar
+from theano import scalar
 from theano import tensor as tt
-from theano.gof import Op
-from theano.gof.graph import inputs
+from theano.graph.basic import Apply, graph_inputs
+from theano.graph.op import Op
 from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream
 
 from pymc3.blocking import ArrayOrdering
@@ -57,7 +57,7 @@ def inputvars(a):
     -------
         r: list of tensor variables that are inputs
     """
-    return [v for v in inputs(makeiter(a)) if isinstance(v, tt.TensorVariable)]
+    return [v for v in graph_inputs(makeiter(a)) if isinstance(v, tt.TensorVariable)]
 
 
 def cont_inputs(f):
@@ -163,12 +163,12 @@ def grad_ii(i):
     return theano.scan(grad_ii, sequences=[idx], n_steps=f.shape[0], name="jacobian_diag")[0]
 
 
-@change_flags(compute_test_value="ignore")
+@theano.config.change_flags(compute_test_value="ignore")
 def hessian(f, vars=None):
     return -jacobian(gradient(f, vars), vars)
 
 
-@change_flags(compute_test_value="ignore")
+@theano.config.change_flags(compute_test_value="ignore")
 def hessian_diag1(f, v):
     g = gradient1(f, v)
     idx = tt.arange(g.shape[0], dtype="int32")
@@ -179,7 +179,7 @@ def hess_ii(i):
     return theano.map(hess_ii, idx)[0]
 
 
-@change_flags(compute_test_value="ignore")
+@theano.config.change_flags(compute_test_value="ignore")
 def hessian_diag(f, vars=None):
     if vars is None:
         vars = cont_inputs(f)
@@ -340,7 +340,7 @@ def __init__(self, gen, default=None):
 
     def make_node(self, *inputs):
         gen_var = self.generator.make_variable(self)
-        return theano.Apply(self, [], [gen_var])
+        return Apply(self, [], [gen_var])
 
     def perform(self, node, inputs, output_storage, params=None):
         if self.default is not None:
@@ -348,10 +348,10 @@ def perform(self, node, inputs, output_storage, params=None):
         else:
             output_storage[0][0] = next(self.generator)
 
-    def do_constant_folding(self, node):
+    def do_constant_folding(self, fgraph, node):
         return False
 
-    __call__ = change_flags(compute_test_value="off")(Op.__call__)
+    __call__ = theano.config.change_flags(compute_test_value="off")(Op.__call__)
 
     def set_gen(self, gen):
         if not isinstance(gen, GeneratorAdapter):
diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py
index 3c90a962027..896f7422c3d 100644
--- a/pymc3/variational/approximations.py
+++ b/pymc3/variational/approximations.py
@@ -21,7 +21,6 @@
 
 from pymc3.distributions.dist_math import rho2sigma
 from pymc3.math import batched_diag
-from pymc3.theanof import change_flags
 from pymc3.util import update_start_vals
 from pymc3.variational import flows, opvi
 from pymc3.variational.opvi import Approximation, Group, node_property
@@ -60,7 +59,7 @@ def cov(self):
     def std(self):
         return rho2sigma(self.rho)
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super().__init_group__(group)
         if not self._check_user_params():
@@ -115,7 +114,7 @@ class FullRankGroup(Group):
     short_name = "full_rank"
     alias_names = frozenset(["fr"])
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super().__init_group__(group)
         if not self._check_user_params():
@@ -219,7 +218,7 @@ class EmpiricalGroup(Group):
     __param_spec__ = dict(histogram=("s", "d"))
     short_name = "empirical"
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super().__init_group__(group)
         self._check_trace()
@@ -371,7 +370,7 @@ class NormalizingFlowGroup(Group):
     """
     default_flow = "scale-loc"
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         super().__init_group__(group)
         # objects to be resolved
diff --git a/pymc3/variational/flows.py b/pymc3/variational/flows.py
index ddb61d04f00..601c7351fa7 100644
--- a/pymc3/variational/flows.py
+++ b/pymc3/variational/flows.py
@@ -19,7 +19,6 @@
 
 from pymc3.distributions.dist_math import rho2sigma
 from pymc3.memoize import WithMemoization
-from pymc3.theanof import change_flags
 from pymc3.variational import opvi
 from pymc3.variational.opvi import collect_shared_to_list, node_property
 
@@ -206,7 +205,7 @@ def all_params(self):
         return params
 
     @property
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def sum_logdets(self):
         dets = [self.logdet]
         current = self
@@ -223,7 +222,7 @@ def forward(self):
     def logdet(self):
         raise NotImplementedError
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def forward_pass(self, z0):
         ret = theano.clone(self.forward, {self.root.z0: z0})
         try:
@@ -298,7 +297,7 @@ def __call__(self, *args):
 class LinearFlow(AbstractFlow):
     __param_spec__ = dict(u=("d",), w=("d",), b=())
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __init__(self, h, u=None, w=None, b=None, **kwargs):
         self.h = h
         super().__init__(**kwargs)
@@ -408,7 +407,7 @@ def make_uw(self, u, w):
 class ReferencePointFlow(AbstractFlow):
     __param_spec__ = dict(a=(), b=(), z_ref=("d",))
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __init__(self, h, a=None, b=None, z_ref=None, **kwargs):
         super().__init__(**kwargs)
         a = self.add_param(a, "a")
@@ -540,7 +539,7 @@ class ScaleFlow(AbstractFlow):
     __param_spec__ = dict(rho=("d",))
     short_name = "scale"
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __init__(self, rho=None, **kwargs):
         super().__init__(**kwargs)
         rho = self.add_param(rho, "rho")
@@ -565,7 +564,7 @@ class HouseholderFlow(AbstractFlow):
     __param_spec__ = dict(v=("d",))
     short_name = "hh"
 
-    @change_flags(compute_test_value="raise")
+    @theano.config.change_flags(compute_test_value="raise")
     def __init__(self, v=None, **kwargs):
         super().__init__(**kwargs)
         v = self.add_param(v, "v")
diff --git a/pymc3/variational/operators.py b/pymc3/variational/operators.py
index 5a215576719..9a5c2fdc200 100644
--- a/pymc3/variational/operators.py
+++ b/pymc3/variational/operators.py
@@ -11,12 +11,12 @@
 #   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
+import theano
 
 from theano import tensor as tt
 
 import pymc3 as pm
 
-from pymc3.theanof import change_flags
 from pymc3.variational import opvi
 from pymc3.variational.opvi import ObjectiveFunction, Operator
 from pymc3.variational.stein import Stein
@@ -75,7 +75,7 @@ def __init__(self, op, tf):
             raise opvi.ParametrizationError("Op should be KSD")
         ObjectiveFunction.__init__(self, op, tf)
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __call__(self, nmc, **kwargs):
         op = self.op  # type: KSD
         grad = op.apply(self.tf)
diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py
index b12b56110c6..ebf4a9cda84 100644
--- a/pymc3/variational/opvi.py
+++ b/pymc3/variational/opvi.py
@@ -59,7 +59,7 @@
 from pymc3.blocking import ArrayOrdering, DictToArrayBijection, VarMap
 from pymc3.memoize import WithMemoization, memoize
 from pymc3.model import modelcontext
-from pymc3.theanof import change_flags, identity, tt_rng
+from pymc3.theanof import identity, tt_rng
 from pymc3.util import get_default_varnames, get_transformed
 from pymc3.variational.updates import adagrad_window
 
@@ -115,15 +115,20 @@ def node_property(f):
 
         def wrapper(fn):
             return property(
-                memoize(change_flags(compute_test_value="off")(append_name(f)(fn)), bound=True)
+                memoize(
+                    theano.config.change_flags(compute_test_value="off")(append_name(f)(fn)),
+                    bound=True,
+                )
             )
 
         return wrapper
     else:
-        return property(memoize(change_flags(compute_test_value="off")(f), bound=True))
+        return property(
+            memoize(theano.config.change_flags(compute_test_value="off")(f), bound=True)
+        )
 
 
-@change_flags(compute_test_value="ignore")
+@theano.config.change_flags(compute_test_value="ignore")
 def try_to_set_test_value(node_in, node_out, s):
     _s = s
     if s is None:
@@ -286,7 +291,7 @@ def add_obj_updates(
         if self.op.returns_loss:
             updates.loss = obj_target
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def step_function(
         self,
         obj_n_mc=None,
@@ -359,7 +364,7 @@ def step_function(
             step_fn = theano.function([], None, updates=updates, **fn_kwargs)
         return step_fn
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def score_function(
         self, sc_n_mc=None, more_replacements=None, fn_kwargs=None
     ):  # pragma: no cover
@@ -387,7 +392,7 @@ def score_function(
         loss = self(sc_n_mc, more_replacements=more_replacements)
         return theano.function([], loss, **fn_kwargs)
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __call__(self, nmc, **kwargs):
         if "more_tf_params" in kwargs:
             m = -1.0
@@ -925,7 +930,7 @@ def _input_type(self, name):
         else:
             return tt.vector(name)
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def __init_group__(self, group):
         if not group:
             raise GroupError("Got empty group")
@@ -1106,7 +1111,7 @@ def symbolic_random2d(self):
         else:
             return self.symbolic_random
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
         :func:`symbolic_single_sample` new random generator can be allocated and applied to node
@@ -1467,7 +1472,7 @@ def make_size_and_deterministic_replacements(self, s, d, more_replacements=None)
         flat2rand.update(more_replacements)
         return flat2rand
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def set_size_and_deterministic(self, node, s, d, more_replacements=None):
         """*Dev* - after node is sampled via :func:`symbolic_sample_over_posterior` or
         :func:`symbolic_single_sample` new random generator can be allocated and applied to node
@@ -1532,7 +1537,7 @@ def get_optimization_replacements(self, s, d):
             repl[self.datalogp] = self.single_symbolic_datalogp
         return repl
 
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def sample_node(self, node, size=None, deterministic=False, more_replacements=None):
         """Samples given node or nodes over shared posterior
 
@@ -1583,7 +1588,7 @@ def vars_names(vs):
 
     @property
     @memoize(bound=True)
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def sample_dict_fn(self):
         s = tt.iscalar()
         names = [v.name for v in self.model.free_RVs]
diff --git a/pymc3/variational/stein.py b/pymc3/variational/stein.py
index cf836be734e..ca9a9249106 100644
--- a/pymc3/variational/stein.py
+++ b/pymc3/variational/stein.py
@@ -16,7 +16,7 @@
 import theano.tensor as tt
 
 from pymc3.memoize import WithMemoization, memoize
-from pymc3.theanof import change_flags, floatX
+from pymc3.theanof import floatX
 from pymc3.variational.opvi import node_property
 from pymc3.variational.test_functions import rbf
 
@@ -91,6 +91,6 @@ def logp_norm(self):
         return sized_symbolic_logp / self.approx.symbolic_normalizing_constant
 
     @memoize
-    @change_flags(compute_test_value="off")
+    @theano.config.change_flags(compute_test_value="off")
     def _kernel(self):
         return self._kernel_f(self.input_joint_matrix)
diff --git a/pymc3/vartypes.py b/pymc3/vartypes.py
index 9ddd88c21a4..2469036f312 100644
--- a/pymc3/vartypes.py
+++ b/pymc3/vartypes.py
@@ -12,7 +12,7 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
-from theano.gof.graph import Constant as graph_constant
+from theano.graph.basic import Constant as graph_constant
 from theano.tensor import Constant as tensor_constant
 
 __all__ = [
diff --git a/requirements.txt b/requirements.txt
index e10227e8173..54fccd1a8bd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
-arviz>=0.9.0
+arviz>=0.11.0
 dill
 fastprogress>=0.2.0
 numpy>=1.15.0
 pandas>=0.24.0
 patsy>=0.5.1
 scipy>=1.2.0
-theano-pymc==1.0.14
+theano-pymc==1.1.0
 typing-extensions>=3.7.4