From 4a9aee3f8220c3dc1b4c1134abe5f7bf640002a5 Mon Sep 17 00:00:00 2001
From: "Brandon T. Willard" <brandonwillard@users.noreply.github.com>
Date: Wed, 3 Feb 2021 19:34:58 -0600
Subject: [PATCH] Remove newly deprecated classes and functions

Classes and functions removed:
- PyMC3Variable
- ObservedRV
- FreeRV
- MultiObservedRV
- TransformedRV
- ArrayOrdering
- VarMap
- DataMap
- _DrawValuesContext
- _DrawValuesContextBlocker
- is_fast_drawable
- _compile_theano_function
- vectorize_theano_function
- get_vectorize_signature
- _draw_value
- draw_values
- generate_samples
- fast_sample_posterior_predictive

Modules removed:
- pymc3.distributions.posterior_predictive
- pymc3.tests.test_random
---
 docs/source/api/distributions/utilities.rst  |   6 -
 docs/source/api/variables.rst                |  17 -
 docs/source/developer_guide.rst              | 332 +++------
 pymc3/blocking.py                            |  33 +-
 pymc3/data.py                                |  19 +-
 pymc3/distributions/__init__.py              |  35 +-
 pymc3/distributions/bound.py                 |  73 +-
 pymc3/distributions/continuous.py            | 200 +++---
 pymc3/distributions/discrete.py              |  96 +--
 pymc3/distributions/dist_math.py             |   3 +-
 pymc3/distributions/distribution.py          | 686 +-----------------
 pymc3/distributions/mixture.py               | 556 ++++++++-------
 pymc3/distributions/multivariate.py          | 298 ++++----
 pymc3/distributions/posterior_predictive.py  | 698 -------------------
 pymc3/distributions/simulator.py             |  14 +-
 pymc3/distributions/timeseries.py            |  31 +-
 pymc3/distributions/transforms.py            |   4 +-
 pymc3/gp/gp.py                               |   7 +-
 pymc3/model.py                               | 293 +-------
 pymc3/model_graph.py                         |  15 +-
 pymc3/sampling.py                            |   7 +-
 pymc3/step_methods/arraystep.py              |   5 +-
 pymc3/step_methods/elliptical_slice.py       |   4 +-
 pymc3/step_methods/gibbs.py                  |  19 +-
 pymc3/step_methods/hmc/quadpotential.py      |  10 +-
 pymc3/step_methods/metropolis.py             |  16 +-
 pymc3/step_methods/sgmcmc.py                 |  17 +-
 pymc3/tests/test_data_container.py           |  15 -
 pymc3/tests/test_distributions_random.py     | 131 +---
 pymc3/tests/test_distributions_timeseries.py |  11 +-
 pymc3/tests/test_model.py                    |   2 +-
 pymc3/tests/test_ndarray_backend.py          |   4 -
 pymc3/tests/test_random.py                   | 187 -----
 pymc3/tests/test_sampling.py                 | 111 ---
 pymc3/tests/test_shared.py                   |   4 -
 pymc3/tests/test_variational_inference.py    |   3 +-
 pymc3/util.py                                |   2 +-
 pymc3/variational/approximations.py          |   2 +-
 pymc3/variational/inference.py               |   6 +-
 pymc3/variational/opvi.py                    |  38 +-
 40 files changed, 845 insertions(+), 3165 deletions(-)
 delete mode 100644 pymc3/distributions/posterior_predictive.py
 delete mode 100644 pymc3/tests/test_random.py

diff --git a/docs/source/api/distributions/utilities.rst b/docs/source/api/distributions/utilities.rst
index 6532a1c234b..0ccceafe2a6 100644
--- a/docs/source/api/distributions/utilities.rst
+++ b/docs/source/api/distributions/utilities.rst
@@ -12,9 +12,6 @@ Distribution utility classes and functions
   DensityDist
   TensorType
 
-  draw_values
-  generate_samples
-
 
 .. autoclass:: Distribution
 .. autoclass:: Discrete
@@ -23,6 +20,3 @@ Distribution utility classes and functions
 .. autoclass:: DensityDist
     :members:
 .. autofunction:: TensorType
-
-.. autofunction:: draw_values
-.. autofunction:: generate_samples
diff --git a/docs/source/api/variables.rst b/docs/source/api/variables.rst
index 46fd503ab51..b2c687cf56c 100644
--- a/docs/source/api/variables.rst
+++ b/docs/source/api/variables.rst
@@ -6,22 +6,5 @@ Random Variables
 The normal PyMC3 programmer will typically not need to interact with these classes, except possibly when debugging.  Otherwise they are primarily of interest to developers.
 
 
-.. autoclass:: PyMC3Variable
-    :members:
-
-
 .. autoclass:: ValueGradFunction
     :members:
-
-
-.. autoclass:: FreeRV
-    :members:
-
-.. autoclass:: ObservedRV
-    :members:
-
-.. autoclass:: MultiObservedRV
-    :members:
-
-.. autoclass:: TransformedRV
-    :members:
diff --git a/docs/source/developer_guide.rst b/docs/source/developer_guide.rst
index 64463cd5b41..ec89f709df2 100644
--- a/docs/source/developer_guide.rst
+++ b/docs/source/developer_guide.rst
@@ -156,8 +156,8 @@ explicit about the conversion. For example:
 .. code:: python
 
     with pm.Model() as model:
-        z = pm.Normal('z', mu=0., sigma=5.)             # ==> pymc3.model.FreeRV, or theano.tensor with logp
-        x = pm.Normal('x', mu=z, sigma=1., observed=5.) # ==> pymc3.model.ObservedRV, also has logp properties
+        z = pm.Normal('z', mu=0., sigma=5.)             # ==> theano.tensor.var.TensorVariable
+        x = pm.Normal('x', mu=z, sigma=1., observed=5.) # ==> theano.tensor.var.TensorVariable
     x.logp({'z': 2.5})                                  # ==> -4.0439386
     model.logp({'z': 2.5})                              # ==> -6.6973152
 
@@ -190,12 +190,11 @@ explicit about the conversion. For example:
     model_logp                                       # ==> -6.6973152
 
 
-Random method and logp method, very different behind the curtain
+``logp`` method, very different behind the curtain
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-In short, the random method is scipy/numpy-based, and the logp method is
-Theano-based. The ``logp`` method is straightforward - it is a Theano
-function within each distribution. It has the following signature:
+The ``logp`` method is straightforward - it is a Theano function within each
+distribution. It has the following signature:
 
 .. code:: python
 
@@ -229,43 +228,13 @@ itself <https://github.com/pymc-devs/pymc3/blob/6d07591962a6c135640a3c31903eba66
         self.logp_sum_unscaledt = distribution.logp_sum(self)
         self.logp_nojac_unscaledt = distribution.logp_nojac(self)
 
-Or for a ObservedRV. it evaluate the logp on the data:
+Or for an observed RV. it evaluate the logp on the data:
 
 .. code:: python
 
         self.logp_sum_unscaledt = distribution.logp_sum(data)
         self.logp_nojac_unscaledt = distribution.logp_nojac(data)
 
-However, for the random method things are a bit less graceful. As the
-random generator is limited in Theano, all random generation is done in
-scipy/numpy land. In the random method, we have:
-
-.. code:: python
-
-    def random(self, point=None, size=None):
-        # GET PARAMETERS
-        param1, param2, ... = draw_values([self.param1, self.param2, ...],
-                                          point=point,
-                                          size=size)
-        # GENERATE SAMPLE
-        samples = generate_samples(SCIPY_OR_NUMPY_RANDOM_FUNCTION,
-                                   param1, param2, ... # ==> parameters, type is numpy arrays
-                                   dist_shape=self.shape,
-                                   size=size)
-        return samples
-
-Here, ``point`` is a dictionary that contains dependence of
-``param1, param2, ...``, and ``draw_values`` generates a (random)
-``(size, ) + param.shape`` arrays *conditioned* on the information from
-``point``. This is the backbone for forwarding random simulation. The
-``draw_values`` function is a recursive algorithm to try to resolve all
-the dependence outside of Theano, by walking the Theano computational
-graph, it is complicated and a constant pain point for bug fixing:
-https://github.com/pymc-devs/pymc3/blob/master/pymc3/distributions/distribution.py#L217-L529
-(But also see a `recent
-PR <https://github.com/pymc-devs/pymc3/pull/3273>`__ that use
-interception and context manager to resolve the dependence issue)
-
 Model context and Random Variable
 ---------------------------------
 
@@ -323,164 +292,112 @@ a model:
         x = pm.Normal('x', mu=0., sigma=1.)
 
 
-Which is the same as doing:
-
-
-.. code:: python
-
-    m = pm.Model()
-    x = m.Var('x', pm.Normal.dist(mu=0., sigma=1.))
-
-
-Both with the same output:
-
-
 .. parsed-literal::
 
-    print(type(x))                              # ==> <class 'pymc3.model.FreeRV'>
+    print(type(x))                              # ==> <class 'theano.tensor.var.TensorVariable'>
     print(m.free_RVs)                           # ==> [x]
-    print(x.distribution.logp(5.))              # ==> Elemwise{switch,no_inplace}.0
-    print(x.distribution.logp(5.).eval({}))     # ==> -13.418938533204672
+    print(logpt(x, 5.0))                        # ==> Elemwise{switch,no_inplace}.0
+    print(logpt(x, 5.).eval({}))                # ==> -13.418938533204672
     print(m.logp({'x': 5.}))                    # ==> -13.418938533204672
 
 
+In general, if a variable has observations (``observed`` parameter), the RV is
+an observed RV, otherwise if it has a ``transformed`` (``transform`` parameter)
+attribute, it is a transformed RV otherwise, it will be the most elementary
+form: a free RV.  Note that this means that random variables with observations
+cannot be transformed.
 
-Looking closer to the classmethod ``model.Var``, it is clear that what
-PyMC3 does is an **interception** of the Random Variable, depending on
-the ``*args``:
-https://github.com/pymc-devs/pymc3/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc3/model.py#L786-L847
+..
+   Below, I will take a deeper look into transformed RV. A normal user
+   might not necessarily come in contact with the concept, since a
+   transformed RV and ``TransformedDistribution`` are intentionally not
+   user facing.
 
-.. code:: python
+   Because in PyMC3 there is no bijector class like in TFP or pyro, we only
+   have a partial implementation called ``Transform``, which implements
+   Jacobian correction for forward mapping only (there is no Jacobian
+   correction for inverse mapping). The use cases we considered are limited
+   to the set of distributions that are bounded, and the transformation
+   maps the bounded set to the real line - see
+   `doc
+   <https://docs.pymc.io/notebooks/api_quickstart.html#Automatic-transforms-of-bounded-RVs>`__.
+   However, other transformations are possible.
+   In general, PyMC3 does not provide explicit functionality to transform
+   one distribution to another. Instead, a dedicated distribution is
+   usually created in order to optimise performance. But getting a
+   ``TransformedDistribution`` is also possible (see also in
+   `doc <https://docs.pymc.io/notebooks/api_quickstart.html#Transformed-distributions-and-changes-of-variables>`__):
 
-    def Var(self, name, dist, data=None, total_size=None):
-        """
-        ...
-        """
-        ...
-        if data is None:
-            if getattr(dist, "transform", None) is None:
-                with self:
-                    var = FreeRV(...)             # ==> FreeRV
-                self.free_RVs.append(var)
-            else:
-                with self:
-                    var = TransformedRV(...)      # ==> TransformedRV
-                ...
-                self.deterministics.append(var)
-                self.add_random_variable(var)
-                return var
-        elif isinstance(data, dict):
-            with self:
-                var = MultiObservedRV(...)        # ==> MultiObservedRV
-            self.observed_RVs.append(var)
-            if var.missing_values:
-                ...                               # ==> Additional FreeRV if there is missing values
-        else:
-            with self:
-                var = ObservedRV(...)             # ==> ObservedRV
-            self.observed_RVs.append(var)
-            if var.missing_values:
-                ...                               # ==> Additional FreeRV if there is missing values
-
-        self.add_random_variable(var)
-        return var
-
-In general, if a variable has observations (``observed`` parameter), the RV is defined as an ``ObservedRV``,
-otherwise if it has a ``transformed`` (``transform`` parameter) attribute, it is a
-``TransformedRV``, otherwise, it will be the most elementary form: a
-``FreeRV``.  Note that this means that random variables with
-observations cannot be transformed.
-
-Below, I will take a deeper look into ``TransformedRV``. A normal user
-might not necessary come in contact with the concept, as
-``TransformedRV`` and ``TransformedDistribution`` are intentionally not
-user facing.
-
-Because in PyMC3 there is no bijector class like in TFP or pyro, we only
-have a partial implementation called ``Transform``, which implements
-Jacobian correction for forward mapping only (there is no Jacobian
-correction for inverse mapping). The use cases we considered are limited
-to the set of distributions that are bounded, and the transformation
-maps the bounded set to the real line - see
-`doc
-<https://docs.pymc.io/notebooks/api_quickstart.html#Automatic-transforms-of-bounded-RVs>`__.
-However, other transformations are possible.
-In general, PyMC3 does not provide explicit functionality to transform
-one distribution to another. Instead, a dedicated distribution is
-usually created in order to optimise performance. But getting a
-``TransformedDistribution`` is also possible (see also in
-`doc <https://docs.pymc.io/notebooks/api_quickstart.html#Transformed-distributions-and-changes-of-variables>`__):
-
-.. code:: python
+   .. code:: python
 
-    tr = pm.distributions.transforms
-    class Exp(tr.ElemwiseTransform):
-        name = "exp"
-        def backward(self, x):
-            return tt.log(x)
-        def forward(self, x):
-            return tt.exp(x)
-        def jacobian_det(self, x):
-            return -tt.log(x)
+       tr = pm.distributions.transforms
+       class Exp(tr.ElemwiseTransform):
+           name = "exp"
+           def backward(self, x):
+               return tt.log(x)
+           def forward(self, x):
+               return tt.exp(x)
+           def jacobian_det(self, x):
+               return -tt.log(x)
 
-    lognorm = Exp().apply(pm.Normal.dist(0., 1.))
-    lognorm
+       lognorm = Exp().apply(pm.Normal.dist(0., 1.))
+       lognorm
 
 
-.. parsed-literal::
+   .. parsed-literal::
 
-    <pymc3.distributions.transforms.TransformedDistribution at 0x7f1536749b00>
+       <pymc3.distributions.transforms.TransformedDistribution at 0x7f1536749b00>
 
 
 
-Now, back to ``model.RV(...)`` - things returned from ``model.RV(...)``
-are Theano tensor variables, and it is clear from looking at
-``TransformedRV``:
+   Now, back to ``model.RV(...)`` - things returned from ``model.RV(...)``
+   are Theano tensor variables, and it is clear from looking at
+   ``TransformedRV``:
 
-.. code:: python
+   .. code:: python
 
-    class TransformedRV(TensorVariable):
-        ...
+       class TransformedRV(TensorVariable):
+           ...
 
-as for ``FreeRV`` and ``ObservedRV``, they are ``TensorVariable``\s with
-``Factor`` as mixin:
+   as for ``FreeRV`` and ``ObservedRV``, they are ``TensorVariable``\s with
+   ``Factor`` as mixin:
 
-.. code:: python
+   .. code:: python
 
-    class FreeRV(Factor, TensorVariable):
-        ...
+       class FreeRV(Factor, TensorVariable):
+           ...
 
-``Factor`` basically `enable and assign the
-logp <https://github.com/pymc-devs/pymc3/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc3/model.py#L195-L276>`__
-(representated as a tensor also) property to a Theano tensor (thus
-making it a random variable). For a ``TransformedRV``, it transforms the
-distribution into a ``TransformedDistribution``, and then ``model.Var`` is
-called again to added the RV associated with the
-``TransformedDistribution`` as a ``FreeRV``:
+   ``Factor`` basically `enable and assign the
+   logp <https://github.com/pymc-devs/pymc3/blob/6d07591962a6c135640a3c31903eba66b34e71d8/pymc3/model.py#L195-L276>`__
+   (representated as a tensor also) property to a Theano tensor (thus
+   making it a random variable). For a ``TransformedRV``, it transforms the
+   distribution into a ``TransformedDistribution``, and then ``model.Var`` is
+   called again to added the RV associated with the
+   ``TransformedDistribution`` as a ``FreeRV``:
 
-.. code:: python
+   .. code:: python
 
-        ...
-        self.transformed = model.Var(
-                    transformed_name, transform.apply(distribution), total_size=total_size)
+           ...
+           self.transformed = model.Var(
+                       transformed_name, transform.apply(distribution), total_size=total_size)
 
-note: after ``transform.apply(distribution)`` its ``.transform``
-porperty is set to ``None``, thus making sure that the above call will
-only add one ``FreeRV``. In another word, you *cannot* do chain
-transformation by nested applying multiple transforms to a Distribution
-(however, you can use `Chain
-transformation <https://docs.pymc.io/notebooks/api_quickstart.html?highlight=chain%20transformation>`__).
+   note: after ``transform.apply(distribution)`` its ``.transform``
+   porperty is set to ``None``, thus making sure that the above call will
+   only add one ``FreeRV``. In another word, you *cannot* do chain
+   transformation by nested applying multiple transforms to a Distribution
+   (however, you can use `Chain
+   transformation <https://docs.pymc.io/notebooks/api_quickstart.html?highlight=chain%20transformation>`__).
 
-.. code:: python
+   .. code:: python
 
-    z = pm.Lognormal.dist(mu=0., sigma=1., transform=tr.Log)
-    z.transform           # ==> pymc3.distributions.transforms.Log
+       z = pm.Lognormal.dist(mu=0., sigma=1., transform=tr.Log)
+       z.transform           # ==> pymc3.distributions.transforms.Log
 
 
-.. code:: python
+   .. code:: python
 
-    z2 = Exp().apply(z)
-    z2.transform is None  # ==> True
+       z2 = Exp().apply(z)
+       z2.transform is None  # ==> True
 
 
 
@@ -624,93 +541,6 @@ Theano graph to compile additional Theano functions. PyMC3 relies on
 ``theano.clone`` to copy the ``model.logpt`` and replace its input. It
 does not edit or rewrite the graph directly.
 
-.. code:: python
-
-    class ValueGradFunction:
-        """Create a theano function that computes a value and its gradient.
-        ...
-        """
-        def __init__(self, logpt, grad_vars, extra_vars=[], dtype=None,
-                     casting='no', **kwargs):
-            ...
-
-            self._grad_vars = grad_vars
-            self._extra_vars = extra_vars
-            self._extra_var_names = set(var.name for var in extra_vars)
-            self._logpt = logpt
-            self._ordering = ArrayOrdering(grad_vars)
-            self.size = self._ordering.size
-            self._extra_are_set = False
-
-            ...
-
-            # Extra vars are a subset of free_RVs that are not input to the compiled function.
-            # But nonetheless logpt depends on these RVs.
-            # This is set up as a dict of theano.shared tensors, but givens (a list of
-            # tuple(free_RVs, theano.shared)) is the actual list that goes into the theano function
-            givens = []
-            self._extra_vars_shared = {}
-            for var in extra_vars:
-                shared = theano.shared(var.tag.test_value, var.name + '_shared__')
-                self._extra_vars_shared[var.name] = shared
-                givens.append((var, shared))
-
-            # See the implementation below. Basically, it clones the logpt and replaces its
-            # input with a *single* 1d theano tensor
-            self._vars_joined, self._logpt_joined = self._build_joined(
-                self._logpt, grad_vars, self._ordering.vmap)
-
-            grad = tt.grad(self._logpt_joined, self._vars_joined)
-            grad.name = '__grad'
-
-            inputs = [self._vars_joined]
-
-            self._theano_function = theano.function(
-                inputs, [self._logpt_joined, grad], givens=givens, **kwargs)
-
-
-        def _build_joined(self, logpt, args, vmap):
-            args_joined = tt.vector('__args_joined')
-            args_joined.tag.test_value = np.zeros(self.size, dtype=self.dtype)
-
-            joined_slices = {}
-            for vmap in vmap:
-                sliced = args_joined[vmap.slc].reshape(vmap.shp)
-                sliced.name = vmap.var
-                joined_slices[vmap.var] = sliced
-
-            replace = {var: joined_slices[var.name] for var in args}
-            return args_joined, theano.clone(logpt, replace=replace)
-
-
-        def __call__(self, array, grad_out=None, extra_vars=None):
-            ...
-            logp, dlogp = self._theano_function(array)
-            return logp, dlogp
-
-
-        def set_extra_values(self, extra_vars):
-            ...
-
-        def get_extra_values(self):
-            ...
-
-        @property
-        def profile(self):
-            ...
-
-        def dict_to_array(self, point):
-            ...
-
-        def array_to_dict(self, array):
-            ...
-
-        def array_to_full_dict(self, array):
-            """Convert an array to a dictionary with grad_vars and extra_vars."""
-            ...
-
-        ...
-
 The important parts of the above function is highlighted and commented.
 On a high level, it allows us to build conditional logp function and its
 gradient easily. Here is a taste of how it works in action:
diff --git a/pymc3/blocking.py b/pymc3/blocking.py
index 59750a30c7b..332edceed89 100644
--- a/pymc3/blocking.py
+++ b/pymc3/blocking.py
@@ -23,42 +23,11 @@
 
 import numpy as np
 
-__all__ = ["ArrayOrdering", "DictToArrayBijection"]
+__all__ = ["DictToArrayBijection"]
 
 # `point_map_info` is a tuple of tuples containing `(name, shape, dtype)` for
 # each of the raveled variables.
 RaveledVars = collections.namedtuple("RaveledVars", "data, point_map_info")
-VarMap = collections.namedtuple("VarMap", "var, slc, shp, dtyp")
-DataMap = collections.namedtuple("DataMap", "list_ind, slc, shp, dtype, name")
-
-
-class ArrayOrdering:
-    """
-    An ordering for an array space
-    """
-
-    def __init__(self, vars):
-        self.vmap = []
-        self.by_name = {}
-        self.size = 0
-
-        for var in vars:
-            name = var.name
-            if name is None:
-                raise ValueError("Unnamed variable in ArrayOrdering.")
-            if name in self.by_name:
-                raise ValueError("Name of variable not unique: %s." % name)
-            if not hasattr(var, "dshape") or not hasattr(var, "dsize"):
-                raise ValueError("Shape of variable not known %s" % name)
-
-            slc = slice(self.size, self.size + var.dsize)
-            varmap = VarMap(name, slc, var.dshape, var.dtype)
-            self.vmap.append(varmap)
-            self.by_name[name] = varmap
-            self.size += var.dsize
-
-    def __getitem__(self, key):
-        return self.by_name[key]
 
 
 class DictToArrayBijection:
diff --git a/pymc3/data.py b/pymc3/data.py
index 4cdb793aa33..b50e329a7e0 100644
--- a/pymc3/data.py
+++ b/pymc3/data.py
@@ -542,15 +542,16 @@ def __new__(self, name, value, *, dims=None, export_index_as_coords=False):
 
         # To draw the node for this variable in the graphviz Digraph we need
         # its shape.
-        shared_object.dshape = tuple(shared_object.shape.eval())
-        if dims is not None:
-            shape_dims = model.shape_from_dims(dims)
-            if shared_object.dshape != shape_dims:
-                raise pm.exceptions.ShapeError(
-                    "Data shape does not match with specified `dims`.",
-                    actual=shared_object.dshape,
-                    expected=shape_dims,
-                )
+        # XXX: This needs to be refactored
+        # shared_object.dshape = tuple(shared_object.shape.eval())
+        # if dims is not None:
+        #     shape_dims = model.shape_from_dims(dims)
+        #     if shared_object.dshape != shape_dims:
+        #         raise pm.exceptions.ShapeError(
+        #             "Data shape does not match with specified `dims`.",
+        #             actual=shared_object.dshape,
+        #             expected=shape_dims,
+        #         )
 
         model.add_random_variable(shared_object, dims=dims)
 
diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index af7bbf713bd..82e52781f11 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -339,8 +339,7 @@ def logpt_sum(rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None,
     return tt.sum(logpt(rv_var, rv_value, **kwargs))
 
 
-# from pymc3.distributions import timeseries
-from pymc3.distributions import shape_utils, transforms
+from pymc3.distributions import shape_utils, timeseries, transforms
 from pymc3.distributions.bart import BART
 from pymc3.distributions.bound import Bound
 from pymc3.distributions.continuous import (
@@ -402,8 +401,6 @@ def logpt_sum(rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None,
     Discrete,
     Distribution,
     NoDistribution,
-    draw_values,
-    generate_samples,
 )
 from pymc3.distributions.mixture import Mixture, MixtureSameFamily, NormalMixture
 from pymc3.distributions.multivariate import (
@@ -419,17 +416,16 @@ def logpt_sum(rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None,
     Wishart,
     WishartBartlett,
 )
-from pymc3.distributions.posterior_predictive import fast_sample_posterior_predictive
 from pymc3.distributions.simulator import Simulator
+from pymc3.distributions.timeseries import (
+    AR,
+    AR1,
+    GARCH11,
+    GaussianRandomWalk,
+    MvGaussianRandomWalk,
+    MvStudentTRandomWalk,
+)
 
-# from pymc3.distributions.timeseries import (
-#     AR,
-#     AR1,
-#     GARCH11,
-#     GaussianRandomWalk,
-#     MvGaussianRandomWalk,
-#     MvStudentTRandomWalk,
-# )
 __all__ = [
     "Uniform",
     "Flat",
@@ -487,13 +483,13 @@ def logpt_sum(rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None,
     "WishartBartlett",
     "LKJCholeskyCov",
     "LKJCorr",
-    # "AR1",
-    # "AR",
+    "AR1",
+    "AR",
     "AsymmetricLaplace",
-    # "GaussianRandomWalk",
-    # "MvGaussianRandomWalk",
-    # "MvStudentTRandomWalk",
-    # "GARCH11",
+    "GaussianRandomWalk",
+    "MvGaussianRandomWalk",
+    "MvStudentTRandomWalk",
+    "GARCH11",
     "SkewNormal",
     "Mixture",
     "NormalMixture",
@@ -508,6 +504,5 @@ def logpt_sum(rv_var: TensorVariable, rv_value: Optional[TensorVariable] = None,
     "Rice",
     "Moyal",
     "Simulator",
-    "fast_sample_posterior_predictive",
     "BART",
 ]
diff --git a/pymc3/distributions/bound.py b/pymc3/distributions/bound.py
index 074a575ebad..5fdb6070faa 100644
--- a/pymc3/distributions/bound.py
+++ b/pymc3/distributions/bound.py
@@ -19,13 +19,7 @@
 
 from pymc3.distributions import transforms
 from pymc3.distributions.dist_math import bound
-from pymc3.distributions.distribution import (
-    Continuous,
-    Discrete,
-    Distribution,
-    draw_values,
-    generate_samples,
-)
+from pymc3.distributions.distribution import Continuous, Discrete, Distribution
 from pymc3.theanof import floatX
 
 __all__ = ["Bound"]
@@ -115,38 +109,39 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        if self.lower is None and self.upper is None:
-            return self._wrapped.random(point=point, size=size)
-        elif self.lower is not None and self.upper is not None:
-            lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
-            return generate_samples(
-                self._random,
-                lower,
-                upper,
-                dist_shape=self.shape,
-                size=size,
-                not_broadcast_kwargs={"point": point},
-            )
-        elif self.lower is not None:
-            lower = draw_values([self.lower], point=point, size=size)
-            return generate_samples(
-                self._random,
-                lower,
-                np.inf,
-                dist_shape=self.shape,
-                size=size,
-                not_broadcast_kwargs={"point": point},
-            )
-        else:
-            upper = draw_values([self.upper], point=point, size=size)
-            return generate_samples(
-                self._random,
-                -np.inf,
-                upper,
-                dist_shape=self.shape,
-                size=size,
-                not_broadcast_kwargs={"point": point},
-            )
+        # if self.lower is None and self.upper is None:
+        #     return self._wrapped.random(point=point, size=size)
+        # elif self.lower is not None and self.upper is not None:
+        #     lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
+        #     return generate_samples(
+        #         self._random,
+        #         lower,
+        #         upper,
+        #         dist_shape=self.shape,
+        #         size=size,
+        #         not_broadcast_kwargs={"point": point},
+        #     )
+        # elif self.lower is not None:
+        #     lower = draw_values([self.lower], point=point, size=size)
+        #     return generate_samples(
+        #         self._random,
+        #         lower,
+        #         np.inf,
+        #         dist_shape=self.shape,
+        #         size=size,
+        #         not_broadcast_kwargs={"point": point},
+        #     )
+        # else:
+        #     upper = draw_values([self.upper], point=point, size=size)
+        #     return generate_samples(
+        #         self._random,
+        #         -np.inf,
+        #         upper,
+        #         dist_shape=self.shape,
+        #         size=size,
+        #         not_broadcast_kwargs={"point": point},
+        #     )
+        pass
 
     def _distr_parameters_for_repr(self):
         return ["lower", "upper"]
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
index 2f5bf78cf04..96514f84f7c 100644
--- a/pymc3/distributions/continuous.py
+++ b/pymc3/distributions/continuous.py
@@ -24,7 +24,6 @@
 
 from scipy import stats
 from scipy.interpolate import InterpolatedUnivariateSpline
-from scipy.special import expit
 from theano.tensor.opt import Assert
 from theano.tensor.random.basic import (
     GammaRV,
@@ -41,7 +40,6 @@
     alltrue_elemwise,
     betaln,
     bound,
-    clipped_beta_rvs,
     gammaln,
     i0e,
     incomplete_beta,
@@ -51,7 +49,7 @@
     normal_lcdf,
     zvalue,
 )
-from pymc3.distributions.distribution import Continuous, draw_values, generate_samples
+from pymc3.distributions.distribution import Continuous
 from pymc3.distributions.special import log_i0
 from pymc3.math import invlogit, log1mexp, log1pexp, logdiffexp, logit
 from pymc3.theanof import floatX
@@ -661,18 +659,18 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, sigma, lower, upper = draw_values(
-            [self.mu, self.sigma, self.lower, self.upper], point=point, size=size
-        )
-        return generate_samples(
-            self._random,
-            mu=mu,
-            sigma=sigma,
-            lower=lower,
-            upper=upper,
-            dist_shape=self.shape,
-            size=size,
-        )
+        # mu, sigma, lower, upper = draw_values(
+        #     [self.mu, self.sigma, self.lower, self.upper], point=point, size=size
+        # )
+        # return generate_samples(
+        #     self._random,
+        #     mu=mu,
+        #     sigma=sigma,
+        #     lower=lower,
+        #     upper=upper,
+        #     dist_shape=self.shape,
+        #     size=size,
+        # )
 
     def _random(self, mu, sigma, lower, upper, size):
         """Wrapper around stats.truncnorm.rvs that converts TruncatedNormal's
@@ -829,10 +827,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        sigma = draw_values([self.sigma], point=point, size=size)[0]
-        return generate_samples(
-            stats.halfnorm.rvs, loc=0.0, scale=sigma, dist_shape=self.shape, size=size
-        )
+        # sigma = draw_values([self.sigma], point=point, size=size)[0]
+        # return generate_samples(
+        #     stats.halfnorm.rvs, loc=0.0, scale=sigma, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
@@ -1029,8 +1027,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha], point=point, size=size)
-        return generate_samples(self._random, mu, lam, alpha, dist_shape=self.shape, size=size)
+        # mu, lam, alpha = draw_values([self.mu, self.lam, self.alpha], point=point, size=size)
+        # return generate_samples(self._random, mu, lam, alpha, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -1232,8 +1230,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
-        return generate_samples(clipped_beta_rvs, alpha, beta, dist_shape=self.shape, size=size)
+        # alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
+        # return generate_samples(clipped_beta_rvs, alpha, beta, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -1379,8 +1377,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        a, b = draw_values([self.a, self.b], point=point, size=size)
-        return generate_samples(self._random, a, b, dist_shape=self.shape, size=size)
+        # a, b = draw_values([self.a, self.b], point=point, size=size)
+        # return generate_samples(self._random, a, b, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -1469,10 +1467,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        lam = draw_values([self.lam], point=point, size=size)[0]
-        return generate_samples(
-            np.random.exponential, scale=1.0 / lam, dist_shape=self.shape, size=size
-        )
+        # lam = draw_values([self.lam], point=point, size=size)[0]
+        # return generate_samples(
+        #     np.random.exponential, scale=1.0 / lam, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
@@ -1584,8 +1582,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, b = draw_values([self.mu, self.b], point=point, size=size)
-        return generate_samples(np.random.laplace, mu, b, dist_shape=self.shape, size=size)
+        # mu, b = draw_values([self.mu, self.b], point=point, size=size)
+        # return generate_samples(np.random.laplace, mu, b, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -1712,8 +1710,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        b, kappa, mu = draw_values([self.b, self.kappa, self.mu], point=point, size=size)
-        return generate_samples(self._random, b, kappa, mu, dist_shape=self.shape, size=size)
+        # b, kappa, mu = draw_values([self.b, self.kappa, self.mu], point=point, size=size)
+        # return generate_samples(self._random, b, kappa, mu, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -1840,8 +1838,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
-        return generate_samples(self._random, mu, tau, dist_shape=self.shape, size=size)
+        # mu, tau = draw_values([self.mu, self.tau], point=point, size=size)
+        # return generate_samples(self._random, mu, tau, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -1990,10 +1988,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        nu, mu, lam = draw_values([self.nu, self.mu, self.lam], point=point, size=size)
-        return generate_samples(
-            stats.t.rvs, nu, loc=mu, scale=lam ** -0.5, dist_shape=self.shape, size=size
-        )
+        # nu, mu, lam = draw_values([self.nu, self.mu, self.lam], point=point, size=size)
+        # return generate_samples(
+        #     stats.t.rvs, nu, loc=mu, scale=lam ** -0.5, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
@@ -2146,8 +2144,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, m = draw_values([self.alpha, self.m], point=point, size=size)
-        return generate_samples(self._random, alpha, m, dist_shape=self.shape, size=size)
+        # alpha, m = draw_values([self.alpha, self.m], point=point, size=size)
+        # return generate_samples(self._random, alpha, m, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -2278,8 +2276,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
-        return generate_samples(self._random, alpha, beta, dist_shape=self.shape, size=size)
+        # alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
+        # return generate_samples(self._random, alpha, beta, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -2390,8 +2388,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        beta = draw_values([self.beta], point=point, size=size)[0]
-        return generate_samples(self._random, beta, dist_shape=self.shape, size=size)
+        # beta = draw_values([self.beta], point=point, size=size)[0]
+        # return generate_samples(self._random, beta, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -2695,10 +2693,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
-        return generate_samples(
-            stats.invgamma.rvs, a=alpha, scale=beta, dist_shape=self.shape, size=size
-        )
+        # alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
+        # return generate_samples(
+        #     stats.invgamma.rvs, a=alpha, scale=beta, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
@@ -2874,12 +2872,12 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
-
-        def _random(a, b, size=None):
-            return b * (-np.log(np.random.uniform(size=size))) ** (1 / a)
-
-        return generate_samples(_random, alpha, beta, dist_shape=self.shape, size=size)
+        # alpha, beta = draw_values([self.alpha, self.beta], point=point, size=size)
+        #
+        # def _random(a, b, size=None):
+        #     return b * (-np.log(np.random.uniform(size=size))) ** (1 / a)
+        #
+        # return generate_samples(_random, alpha, beta, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -3023,10 +3021,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        nu, sigma = draw_values([self.nu, self.sigma], point=point, size=size)
-        return np.abs(
-            generate_samples(stats.t.rvs, nu, loc=0, scale=sigma, dist_shape=self.shape, size=size)
-        )
+        # nu, sigma = draw_values([self.nu, self.sigma], point=point, size=size)
+        # return np.abs(
+        #     generate_samples(stats.t.rvs, nu, loc=0, scale=sigma, dist_shape=self.shape, size=size)
+        # )
 
     def logp(self, value):
         """
@@ -3159,14 +3157,14 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu], point=point, size=size)
-
-        def _random(mu, sigma, nu, size=None):
-            return np.random.normal(mu, sigma, size=size) + np.random.exponential(
-                scale=nu, size=size
-            )
-
-        return generate_samples(_random, mu, sigma, nu, dist_shape=self.shape, size=size)
+        # mu, sigma, nu = draw_values([self.mu, self.sigma, self.nu], point=point, size=size)
+        #
+        # def _random(mu, sigma, nu, size=None):
+        #     return np.random.normal(mu, sigma, size=size) + np.random.exponential(
+        #         scale=nu, size=size
+        #     )
+        #
+        # return generate_samples(_random, mu, sigma, nu, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -3319,10 +3317,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, kappa = draw_values([self.mu, self.kappa], point=point, size=size)
-        return generate_samples(
-            stats.vonmises.rvs, loc=mu, kappa=kappa, dist_shape=self.shape, size=size
-        )
+        # mu, kappa = draw_values([self.mu, self.kappa], point=point, size=size)
+        # return generate_samples(
+        #     stats.vonmises.rvs, loc=mu, kappa=kappa, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
@@ -3445,12 +3443,12 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, tau, _, alpha = draw_values(
-            [self.mu, self.tau, self.sigma, self.alpha], point=point, size=size
-        )
-        return generate_samples(
-            stats.skewnorm.rvs, a=alpha, loc=mu, scale=tau ** -0.5, dist_shape=self.shape, size=size
-        )
+        # mu, tau, _, alpha = draw_values(
+        #     [self.mu, self.tau, self.sigma, self.alpha], point=point, size=size
+        # )
+        # return generate_samples(
+        #     stats.skewnorm.rvs, a=alpha, loc=mu, scale=tau ** -0.5, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
@@ -3559,10 +3557,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        c, lower, upper = draw_values([self.c, self.lower, self.upper], point=point, size=size)
-        return generate_samples(
-            self._random, c=c, lower=lower, upper=upper, size=size, dist_shape=self.shape
-        )
+        # c, lower, upper = draw_values([self.c, self.lower, self.upper], point=point, size=size)
+        # return generate_samples(
+        #     self._random, c=c, lower=lower, upper=upper, size=size, dist_shape=self.shape
+        # )
 
     def _random(self, c, lower, upper, size):
         """Wrapper around stats.triang.rvs that converts Triangular's
@@ -3711,10 +3709,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, sigma = draw_values([self.mu, self.beta], point=point, size=size)
-        return generate_samples(
-            stats.gumbel_r.rvs, loc=mu, scale=sigma, dist_shape=self.shape, size=size
-        )
+        # mu, sigma = draw_values([self.mu, self.beta], point=point, size=size)
+        # return generate_samples(
+        #     stats.gumbel_r.rvs, loc=mu, scale=sigma, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
@@ -3883,8 +3881,8 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        nu, sigma = draw_values([self.nu, self.sigma], point=point, size=size)
-        return generate_samples(self._random, nu=nu, sigma=sigma, dist_shape=self.shape, size=size)
+        # nu, sigma = draw_values([self.nu, self.sigma], point=point, size=size)
+        # return generate_samples(self._random, nu=nu, sigma=sigma, dist_shape=self.shape, size=size)
 
     def _random(self, nu, sigma, size):
         """Wrapper around stats.rice.rvs that converts Rice's
@@ -3992,11 +3990,11 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, s = draw_values([self.mu, self.s], point=point, size=size)
-
-        return generate_samples(
-            stats.logistic.rvs, loc=mu, scale=s, dist_shape=self.shape, size=size
-        )
+        # mu, s = draw_values([self.mu, self.s], point=point, size=size)
+        #
+        # return generate_samples(
+        #     stats.logistic.rvs, loc=mu, scale=s, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
@@ -4118,10 +4116,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, _, sigma = draw_values([self.mu, self.tau, self.sigma], point=point, size=size)
-        return expit(
-            generate_samples(stats.norm.rvs, loc=mu, scale=sigma, dist_shape=self.shape, size=size)
-        )
+        # mu, _, sigma = draw_values([self.mu, self.tau, self.sigma], point=point, size=size)
+        # return expit(
+        #     generate_samples(stats.norm.rvs, loc=mu, scale=sigma, dist_shape=self.shape, size=size)
+        # )
 
     def logp(self, value):
         """
@@ -4230,7 +4228,7 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        return generate_samples(self._random, dist_shape=self.shape, size=size)
+        # return generate_samples(self._random, dist_shape=self.shape, size=size)
 
     def logp(self, value):
         """
@@ -4329,10 +4327,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, sigma = draw_values([self.mu, self.sigma], point=point, size=size)
-        return generate_samples(
-            stats.moyal.rvs, loc=mu, scale=sigma, dist_shape=self.shape, size=size
-        )
+        # mu, sigma = draw_values([self.mu, self.sigma], point=point, size=size)
+        # return generate_samples(
+        #     stats.moyal.rvs, loc=mu, scale=sigma, dist_shape=self.shape, size=size
+        # )
 
     def logp(self, value):
         """
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
index f7fe622baa1..4ec5378d3b6 100644
--- a/pymc3/distributions/discrete.py
+++ b/pymc3/distributions/discrete.py
@@ -33,8 +33,7 @@
     normal_lccdf,
     normal_lcdf,
 )
-from pymc3.distributions.distribution import Discrete, draw_values, generate_samples
-from pymc3.distributions.shape_utils import broadcast_distribution_samples
+from pymc3.distributions.distribution import Discrete
 from pymc3.math import log1mexp, log1pexp, logaddexp, logit, logsumexp, sigmoid, tround
 from pymc3.theanof import floatX, intX, take_along_axis
 
@@ -276,10 +275,11 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        alpha, beta, n = draw_values([self.alpha, self.beta, self.n], point=point, size=size)
-        return generate_samples(
-            self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size
-        )
+        # alpha, beta, n = draw_values([self.alpha, self.beta, self.n], point=point, size=size)
+        # return generate_samples(
+        #     self._random, alpha=alpha, beta=beta, n=n, dist_shape=self.shape, size=size
+        # )
+        pass
 
     def logp(self, value):
         r"""
@@ -416,8 +416,9 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        p = draw_values([self.p], point=point, size=size)[0]
-        return generate_samples(stats.bernoulli.rvs, p, dist_shape=self.shape, size=size)
+        # p = draw_values([self.p], point=point, size=size)[0]
+        # return generate_samples(stats.bernoulli.rvs, p, dist_shape=self.shape, size=size)
+        pass
 
     def logp(self, value):
         r"""
@@ -553,9 +554,9 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        q, beta = draw_values([self.q, self.beta], point=point, size=size)
-
-        return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size)
+        # q, beta = draw_values([self.q, self.beta], point=point, size=size)
+        # return generate_samples(self._random, q, beta, dist_shape=self.shape, size=size)
+        pass
 
     def logp(self, value):
         r"""
@@ -674,8 +675,9 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu = draw_values([self.mu], point=point, size=size)[0]
-        return generate_samples(stats.poisson.rvs, mu, dist_shape=self.shape, size=size)
+        # mu = draw_values([self.mu], point=point, size=size)[0]
+        # return generate_samples(stats.poisson.rvs, mu, dist_shape=self.shape, size=size)
+        pass
 
     def logp(self, value):
         r"""
@@ -834,10 +836,11 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, alpha = draw_values([self.mu, self.alpha], point=point, size=size)
-        g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size)
-        g[g == 0] = np.finfo(float).eps  # Just in case
-        return np.asarray(stats.poisson.rvs(g)).reshape(g.shape)
+        # mu, alpha = draw_values([self.mu, self.alpha], point=point, size=size)
+        # g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size)
+        # g[g == 0] = np.finfo(float).eps  # Just in case
+        # return np.asarray(stats.poisson.rvs(g)).reshape(g.shape)
+        pass
 
     def _random(self, mu, alpha, size):
         r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's
@@ -974,8 +977,9 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        p = draw_values([self.p], point=point, size=size)[0]
-        return generate_samples(np.random.geometric, p, dist_shape=self.shape, size=size)
+        # p = draw_values([self.p], point=point, size=size)[0]
+        # return generate_samples(np.random.geometric, p, dist_shape=self.shape, size=size)
+        pass
 
     def logp(self, value):
         r"""
@@ -1090,8 +1094,9 @@ def random(self, point=None, size=None):
         array
         """
 
-        N, k, n = draw_values([self.N, self.k, self.n], point=point, size=size)
-        return generate_samples(self._random, N, k, n, dist_shape=self.shape, size=size)
+        # N, k, n = draw_values([self.N, self.k, self.n], point=point, size=size)
+        # return generate_samples(self._random, N, k, n, dist_shape=self.shape, size=size)
+        pass
 
     def _random(self, M, n, N, size=None):
         r"""Wrapper around scipy stat's hypergeom.rvs"""
@@ -1242,8 +1247,9 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
-        return generate_samples(self._random, lower, upper, dist_shape=self.shape, size=size)
+        # lower, upper = draw_values([self.lower, self.upper], point=point, size=size)
+        # return generate_samples(self._random, lower, upper, dist_shape=self.shape, size=size)
+        pass
 
     def logp(self, value):
         r"""
@@ -1416,13 +1422,14 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        c = draw_values([self.c], point=point, size=size)[0]
-        dtype = np.array(c).dtype
-
-        def _random(c, dtype=dtype, size=None):
-            return np.full(size, fill_value=c, dtype=dtype)
-
-        return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(dtype)
+        # c = draw_values([self.c], point=point, size=size)[0]
+        # dtype = np.array(c).dtype
+        #
+        # def _random(c, dtype=dtype, size=None):
+        #     return np.full(size, fill_value=c, dtype=dtype)
+        #
+        # return generate_samples(_random, c=c, dist_shape=self.shape, size=size).astype(dtype)
+        pass
 
     def logp(self, value):
         r"""
@@ -1519,10 +1526,11 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        theta, psi = draw_values([self.theta, self.psi], point=point, size=size)
-        g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size)
-        g, psi = broadcast_distribution_samples([g, psi], size=size)
-        return g * (np.random.random(g.shape) < psi)
+        # theta, psi = draw_values([self.theta, self.psi], point=point, size=size)
+        # g = generate_samples(stats.poisson.rvs, theta, dist_shape=self.shape, size=size)
+        # g, psi = broadcast_distribution_samples([g, psi], size=size)
+        # return g * (np.random.random(g.shape) < psi)
+        pass
 
     def logp(self, value):
         r"""
@@ -1650,10 +1658,11 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size)
-        g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size)
-        g, psi = broadcast_distribution_samples([g, psi], size=size)
-        return g * (np.random.random(g.shape) < psi)
+        # n, p, psi = draw_values([self.n, self.p, self.psi], point=point, size=size)
+        # g = generate_samples(stats.binom.rvs, n, p, dist_shape=self.shape, size=size)
+        # g, psi = broadcast_distribution_samples([g, psi], size=size)
+        # return g * (np.random.random(g.shape) < psi)
+        pass
 
     def logp(self, value):
         r"""
@@ -1804,11 +1813,12 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, alpha, psi = draw_values([self.mu, self.alpha, self.psi], point=point, size=size)
-        g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size)
-        g[g == 0] = np.finfo(float).eps  # Just in case
-        g, psi = broadcast_distribution_samples([g, psi], size=size)
-        return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi)
+        # mu, alpha, psi = draw_values([self.mu, self.alpha, self.psi], point=point, size=size)
+        # g = generate_samples(self._random, mu=mu, alpha=alpha, dist_shape=self.shape, size=size)
+        # g[g == 0] = np.finfo(float).eps  # Just in case
+        # g, psi = broadcast_distribution_samples([g, psi], size=size)
+        # return stats.poisson.rvs(g) * (np.random.random(g.shape) < psi)
+        pass
 
     def _random(self, mu, alpha, size):
         r"""Wrapper around stats.gamma.rvs that converts NegativeBinomial's
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
index 70877722271..b3cbc18a221 100644
--- a/pymc3/distributions/dist_math.py
+++ b/pymc3/distributions/dist_math.py
@@ -35,7 +35,6 @@
 
 from pymc3.distributions.shape_utils import to_tuple
 from pymc3.distributions.special import gammaln
-from pymc3.model import modelcontext
 from pymc3.theanof import floatX
 
 f = floatX
@@ -73,6 +72,8 @@ def bound(logp, *conditions, **kwargs):
 
     # If called inside a model context, see if bounds check is disabled
     try:
+        from pymc3.model import modelcontext
+
         model = modelcontext(kwargs.get("model"))
         if not model.check_bounds:
             return logp
diff --git a/pymc3/distributions/distribution.py b/pymc3/distributions/distribution.py
index a77c46fa807..d7ac55a0b9c 100644
--- a/pymc3/distributions/distribution.py
+++ b/pymc3/distributions/distribution.py
@@ -15,7 +15,6 @@
 import contextvars
 import inspect
 import multiprocessing
-import numbers
 import sys
 import types
 import warnings
@@ -32,23 +31,7 @@
 import theano.graph.basic
 import theano.tensor as tt
 
-from theano import function
-
-from pymc3.distributions.shape_utils import (
-    broadcast_dist_samples_shape,
-    get_broadcastable_dist_samples,
-    to_tuple,
-)
-from pymc3.memoize import memoize
-from pymc3.model import (
-    ContextMeta,
-    FreeRV,
-    Model,
-    MultiObservedRV,
-    ObservedRV,
-    build_named_node_tree,
-)
-from pymc3.util import get_repr_for_variable, get_var_name
+from pymc3.util import get_repr_for_variable
 from pymc3.vartypes import string_types, theano_constant
 
 __all__ = [
@@ -57,8 +40,6 @@
     "Continuous",
     "Discrete",
     "NoDistribution",
-    "draw_values",
-    "generate_samples",
 ]
 
 vectorized_ppc = contextvars.ContextVar(
@@ -80,6 +61,8 @@ class Distribution:
 
     def __new__(cls, name, *args, **kwargs):
         try:
+            from pymc3.model import Model
+
             model = Model.get_context()
         except TypeError:
             raise TypeError(
@@ -99,9 +82,6 @@ def __new__(cls, name, *args, **kwargs):
 
         data = kwargs.pop("observed", None)
 
-        if isinstance(data, ObservedRV) or isinstance(data, FreeRV):
-            raise TypeError("observed needs to be data but got: {}".format(type(data)))
-
         total_size = kwargs.pop("total_size", None)
 
         dims = kwargs.pop("dims", None)
@@ -349,39 +329,9 @@ def __init__(
         testval: number or array (Optional)
             The ``testval`` of the RV's tensor that follow the ``DensityDist``
             distribution.
-        random: None or callable (Optional)
-            If ``None``, no random method is attached to the ``DensityDist``
-            instance.
-            If a callable, it is used as the distribution's ``random`` method.
-            The behavior of this callable can be altered with the
-            ``wrap_random_with_dist_shape`` parameter.
-            The supplied callable must have the following signature:
-            ``random(point=None, size=None, **kwargs)``, where ``point`` is a
-            ``None`` or a dictionary of random variable names and their
-            corresponding values (similar to what ``MultiTrace.get_point``
-            returns). ``size`` is the number of IID draws to take from the
-            distribution. Any extra keyword argument can be added as required.
-        wrap_random_with_dist_shape: bool (Optional)
-            If ``True``, the provided ``random`` callable is passed through
-            ``generate_samples`` to make the random number generator aware of
-            the ``DensityDist`` instance's ``shape``.
-            If ``False``, it is used exactly as it was provided.
-        check_shape_in_random: bool (Optional)
-            If ``True``, the shape of the random samples generate in the
-            ``random`` method is checked with the expected return shape. This
-            test is only performed if ``wrap_random_with_dist_shape is False``.
         args, kwargs: (Optional)
             These are passed to the parent class' ``__init__``.
 
-        Notes
-        -----
-            If the ``random`` method is wrapped with dist shape, what this
-            means is that the ``random`` callable will be wrapped with the
-            :func:`~genereate_samples` function. The distribution's shape will
-            be passed to :func:`~generate_samples` as the ``dist_shape``
-            parameter. Any extra ``kwargs`` provided to ``random`` will be
-            passed as ``not_broadcast_kwargs`` of :func:`~generate_samples`.
-
         Examples
         --------
             .. code-block:: python
@@ -393,19 +343,9 @@ def __init__(
                         'density_dist',
                         normal_dist.logp,
                         observed=np.random.randn(100),
-                        random=normal_dist.random
                     )
                     trace = pm.sample(100)
 
-            If the ``DensityDist`` is multidimensional, some care must be taken
-            with the supplied ``random`` method. By default, the supplied random
-            is wrapped by :func:`~generate_samples` to make it aware of the
-            multidimensional distribution's shape.
-            This can be prevented setting ``wrap_random_with_dist_shape=False``.
-            Furthermore, the ``size`` parameter is interpreted as the number of
-            IID draws to take from this multidimensional distribution.
-
-
             .. code-block:: python
 
                 with pm.Model():
@@ -416,77 +356,6 @@ def __init__(
                         normal_dist.logp,
                         observed=np.random.randn(100, 3),
                         shape=3,
-                        random=normal_dist.random,
-                    )
-                    prior = pm.sample_prior_predictive(10)['density_dist']
-                assert prior.shape == (10, 100, 3)
-
-            If ``wrap_random_with_dist_shape=False``, we start to get samples of
-            an incorrect shape. By default, we can try to catch these situations.
-
-
-            .. code-block:: python
-
-                with pm.Model():
-                    mu = pm.Normal('mu', 0 , 1)
-                    normal_dist = pm.Normal.dist(mu, 1, shape=3)
-                    dens = pm.DensityDist(
-                        'density_dist',
-                        normal_dist.logp,
-                        observed=np.random.randn(100, 3),
-                        shape=3,
-                        random=normal_dist.random,
-                        wrap_random_with_dist_shape=False, # Is True by default
-                    )
-                    err = None
-                    try:
-                        prior = pm.sample_prior_predictive(10)['density_dist']
-                    except RuntimeError as e:
-                        err = e
-                    assert isinstance(err, RuntimeError)
-
-            The default catching can be disabled with the
-            ``check_shape_in_random`` parameter.
-
-
-            .. code-block:: python
-
-                with pm.Model():
-                    mu = pm.Normal('mu', 0 , 1)
-                    normal_dist = pm.Normal.dist(mu, 1, shape=3)
-                    dens = pm.DensityDist(
-                        'density_dist',
-                        normal_dist.logp,
-                        observed=np.random.randn(100, 3),
-                        shape=3,
-                        random=normal_dist.random,
-                        wrap_random_with_dist_shape=False, # Is True by default
-                        check_shape_in_random=False, # Is True by default
-                    )
-                    prior = pm.sample_prior_predictive(10)['density_dist']
-                    # We get samples with an incorrect shape
-                    assert prior.shape != (10, 100, 3)
-
-            If you use callables that work with ``scipy.stats`` rvs, you must
-            be aware that their ``size`` parameter is not the number of IID
-            samples to draw from a distribution, but the desired ``shape`` of
-            the returned array of samples. It is the user's responsibility to
-            wrap the callable to make it comply with PyMC3's interpretation
-            of ``size``.
-
-
-            .. code-block:: python
-
-                with pm.Model():
-                    mu = pm.Normal('mu', 0 , 1)
-                    normal_dist = pm.Normal.dist(mu, 1, shape=3)
-                    dens = pm.DensityDist(
-                        'density_dist',
-                        normal_dist.logp,
-                        observed=np.random.randn(100, 3),
-                        shape=3,
-                        random=stats.norm.rvs,
-                        pymc3_size_interpretation=False, # Is True by default
                     )
                     prior = pm.sample_prior_predictive(10)['density_dist']
                 assert prior.shape == (10, 100, 3)
@@ -534,554 +403,5 @@ def __setstate__(self, vals):
         vals["logp"] = dill.loads(vals["logp"])
         self.__dict__ = vals
 
-    def random(self, point=None, size=None, **kwargs):
-        if self.rand is not None:
-            not_broadcast_kwargs = dict(point=point)
-            not_broadcast_kwargs.update(**kwargs)
-            if self.wrap_random_with_dist_shape:
-                size = to_tuple(size)
-                with _DrawValuesContextBlocker():
-                    test_draw = generate_samples(
-                        self.rand,
-                        size=None,
-                        not_broadcast_kwargs=not_broadcast_kwargs,
-                    )
-                    test_shape = test_draw.shape
-                if self.shape[: len(size)] == size:
-                    dist_shape = size + self.shape
-                else:
-                    dist_shape = self.shape
-                broadcast_shape = broadcast_dist_samples_shape([dist_shape, test_shape], size=size)
-                broadcast_shape = broadcast_shape[: len(broadcast_shape) - len(test_shape)]
-                samples = generate_samples(
-                    self.rand,
-                    broadcast_shape=broadcast_shape,
-                    size=size,
-                    not_broadcast_kwargs=not_broadcast_kwargs,
-                )
-            else:
-                samples = self.rand(point=point, size=size, **kwargs)
-                if self.check_shape_in_random:
-                    expected_shape = self.shape if size is None else to_tuple(size) + self.shape
-                    if not expected_shape == samples.shape:
-                        raise RuntimeError(
-                            "DensityDist encountered a shape inconsistency "
-                            "while drawing samples using the supplied random "
-                            "function. Was expecting to get samples of shape "
-                            "{expected} but got {got} instead.\n"
-                            "Whenever possible wrap_random_with_dist_shape = True "
-                            "is recommended.\n"
-                            "Be aware that the random callable provided as the "
-                            "DensityDist random method cannot "
-                            "adapt to shape changes in the distribution's "
-                            "shape, which sometimes are necessary for sampling "
-                            "when the model uses pymc3.Data or theano shared "
-                            "tensors, or when the DensityDist has observed "
-                            "values.\n"
-                            "This check can be disabled by passing "
-                            "check_shape_in_random=False when the DensityDist "
-                            "is initialized.".format(
-                                expected=expected_shape,
-                                got=samples.shape,
-                            )
-                        )
-            return samples
-        else:
-            raise ValueError(
-                "Distribution was not passed any random method. "
-                "Define a custom random method and pass it as kwarg random"
-            )
-
     def _distr_parameters_for_repr(self):
         return []
-
-
-class _DrawValuesContext(metaclass=ContextMeta, context_class="_DrawValuesContext"):
-    """A context manager class used while drawing values with draw_values"""
-
-    def __new__(cls, *args, **kwargs):
-        # resolves the parent instance
-        instance = super().__new__(cls)
-        instance._parent = cls.get_context(error_if_none=False)
-        return instance
-
-    def __init__(self):
-        if self.parent is not None:
-            # All _DrawValuesContext instances that are in the context of
-            # another _DrawValuesContext will share the reference to the
-            # drawn_vars dictionary. This means that separate branches
-            # in the nested _DrawValuesContext context tree will see the
-            # same drawn values.
-            # The drawn_vars keys shall be (RV, size) tuples
-            self.drawn_vars = self.parent.drawn_vars
-        else:
-            self.drawn_vars = dict()
-
-    @property
-    def parent(self):
-        return self._parent
-
-
-class _DrawValuesContextBlocker(_DrawValuesContext):
-    """
-    Context manager that starts a new drawn variables context disregarding all
-    parent contexts. This can be used inside a random method to ensure that
-    the drawn values wont be the ones cached by previous calls
-    """
-
-    def __new__(cls, *args, **kwargs):
-        # resolves the parent instance
-        instance = super().__new__(cls)
-        instance._parent = None
-        return instance
-
-    def __init__(self):
-        self.drawn_vars = dict()
-
-
-def is_fast_drawable(var):
-    return isinstance(
-        var, (numbers.Number, np.ndarray, theano_constant, tt.sharedvar.SharedVariable)
-    )
-
-
-def draw_values(params, point=None, size=None):
-    """
-    Draw (fix) parameter values. Handles a number of cases:
-
-        1) The parameter is a scalar
-        2) The parameter is an RV
-
-            a) parameter can be fixed to the value in the point
-            b) parameter can be fixed by sampling from the RV
-            c) parameter can be fixed using tag.test_value (last resort)
-
-        3) The parameter is a tensor variable/constant. Can be evaluated using
-        theano.function, but a variable may contain nodes which
-
-            a) are named parameters in the point
-            b) are RVs with a random method
-    """
-    # The following check intercepts and redirects calls to
-    # draw_values in the context of sample_posterior_predictive
-    size = to_tuple(size)
-    ppc_sampler = vectorized_ppc.get(None)
-    if ppc_sampler is not None:
-        # this is being done inside new, vectorized sample_posterior_predictive
-        return ppc_sampler(params, trace=point, samples=size)
-
-    if point is None:
-        point = {}
-    # Get fast drawable values (i.e. things in point or numbers, arrays,
-    # constants or shares, or things that were already drawn in related
-    # contexts)
-    with _DrawValuesContext() as context:
-        params = dict(enumerate(params))
-        drawn = context.drawn_vars
-        evaluated = {}
-        symbolic_params = []
-        for i, p in params.items():
-            # If the param is fast drawable, then draw the value immediately
-            if is_fast_drawable(p):
-                v = _draw_value(p, point=point, size=size)
-                evaluated[i] = v
-                continue
-
-            name = getattr(p, "name", None)
-            if (p, size) in drawn:
-                # param was drawn in related contexts
-                v = drawn[(p, size)]
-                evaluated[i] = v
-            # We filter out Deterministics by checking for `model` attribute
-            elif name is not None and hasattr(p, "model") and name in point:
-                # param.name is in point
-                v = point[name]
-                evaluated[i] = drawn[(p, size)] = v
-            else:
-                # param still needs to be drawn
-                symbolic_params.append((i, p))
-
-        if not symbolic_params:
-            # We only need to enforce the correct order if there are symbolic
-            # params that could be drawn in variable order
-            return [evaluated[i] for i in params]
-
-        # Distribution parameters may be nodes which have named node-inputs
-        # specified in the point. Need to find the node-inputs, their
-        # parents and children to replace them.
-        leaf_nodes, named_nodes_descendents, named_nodes_ancestors = build_named_node_tree(
-            (param for _, param in symbolic_params if hasattr(param, "name"))
-        )
-
-        # Init givens and the stack of nodes to try to `_draw_value` from
-        givens = {
-            p.name: (p, v) for (p, size), v in drawn.items() if getattr(p, "name", None) is not None
-        }
-        stack = list(leaf_nodes.values())
-        while stack:
-            next_ = stack.pop(0)
-            if (next_, size) in drawn:
-                # If the node already has a givens value, skip it
-                continue
-            elif isinstance(next_, (theano_constant, tt.sharedvar.SharedVariable)):
-                # If the node is a theano.tensor.TensorConstant or a
-                # theano.tensor.sharedvar.SharedVariable, its value will be
-                # available automatically in _compile_theano_function so
-                # we can skip it. Furthermore, if this node was treated as a
-                # TensorVariable that should be compiled by theano in
-                # _compile_theano_function, it would raise a `TypeError:
-                # ('Constants not allowed in param list', ...)` for
-                # TensorConstant, and a `TypeError: Cannot use a shared
-                # variable (...) as explicit input` for SharedVariable.
-                # ObservedRV and MultiObservedRV instances are ViewOPs
-                # of TensorConstants or SharedVariables, we must add them
-                # to the stack or risk evaluating deterministics with the
-                # wrong values (issue #3354)
-                stack.extend(
-                    [
-                        node
-                        for node in named_nodes_descendents[next_]
-                        if isinstance(node, (ObservedRV, MultiObservedRV))
-                        and (node, size) not in drawn
-                    ]
-                )
-                continue
-            else:
-                # If the node does not have a givens value, try to draw it.
-                # The named node's children givens values must also be taken
-                # into account.
-                children = named_nodes_ancestors[next_]
-                temp_givens = [givens[k] for k in givens if k in children]
-                try:
-                    # This may fail for autotransformed RVs, which don't
-                    # have the random method
-                    value = _draw_value(next_, point=point, givens=temp_givens, size=size)
-                    givens[next_.name] = (next_, value)
-                    drawn[(next_, size)] = value
-                except theano.graph.fg.MissingInputError:
-                    # The node failed, so we must add the node's parents to
-                    # the stack of nodes to try to draw from. We exclude the
-                    # nodes in the `params` list.
-                    stack.extend(
-                        [
-                            node
-                            for node in named_nodes_descendents[next_]
-                            if node is not None and (node, size) not in drawn
-                        ]
-                    )
-
-        # the below makes sure the graph is evaluated in order
-        # test_distributions_random::TestDrawValues::test_draw_order fails without it
-        # The remaining params that must be drawn are all hashable
-        to_eval = set()
-        missing_inputs = {j for j, p in symbolic_params}
-        while to_eval or missing_inputs:
-            if to_eval == missing_inputs:
-                raise ValueError(
-                    "Cannot resolve inputs for {}".format(
-                        [get_var_name(params[j]) for j in to_eval]
-                    )
-                )
-            to_eval = set(missing_inputs)
-            missing_inputs = set()
-            for param_idx in to_eval:
-                param = params[param_idx]
-                if (param, size) in drawn:
-                    evaluated[param_idx] = drawn[(param, size)]
-                else:
-                    try:  # might evaluate in a bad order,
-                        # Sometimes _draw_value recurrently calls draw_values.
-                        # This may set values for certain nodes in the drawn
-                        # dictionary, but they don't get added to the givens
-                        # dictionary. Here, we try to fix that.
-                        if param in named_nodes_ancestors:
-                            for node in named_nodes_ancestors[param]:
-                                if node.name not in givens and (node, size) in drawn:
-                                    givens[node.name] = (node, drawn[(node, size)])
-                        value = _draw_value(param, point=point, givens=givens.values(), size=size)
-                        evaluated[param_idx] = drawn[(param, size)] = value
-                        givens[param.name] = (param, value)
-                    except theano.graph.fg.MissingInputError:
-                        missing_inputs.add(param_idx)
-
-    return [evaluated[j] for j in params]  # set the order back
-
-
-@memoize
-def _compile_theano_function(param, vars, givens=None):
-    """Compile theano function for a given parameter and input variables.
-
-    This function is memoized to avoid repeating costly theano compilations
-    when repeatedly drawing values, which is done when generating posterior
-    predictive samples.
-
-    Parameters
-    ----------
-    param: Model variable from which to draw value
-    vars: Children variables of `param`
-    givens: Variables to be replaced in the Theano graph
-
-    Returns
-    -------
-    A compiled theano function that takes the values of `vars` as input
-        positional args
-    """
-    f = function(
-        vars,
-        param,
-        givens=givens,
-        rebuild_strict=True,
-        on_unused_input="ignore",
-        allow_input_downcast=True,
-    )
-    return vectorize_theano_function(f, inputs=vars, output=param)
-
-
-def vectorize_theano_function(f, inputs, output):
-    """Takes a compiled theano function and wraps it with a vectorized version.
-    Theano compiled functions expect inputs and outputs of a fixed number of
-    dimensions. In our context, these usually come from deterministics which
-    are compiled against a given RV, with its core shape. If we draw i.i.d.
-    samples from said RV, we would not be able to compute the deterministic
-    over the i.i.d sampled dimensions (i.e. those that are not the core
-    dimensions of the RV). To deal with this problem, we wrap the theano
-    compiled function with numpy.vectorize, providing the correct signature
-    for the core dimensions. The extra dimensions, will be interpreted as
-    i.i.d. sampled axis and will be broadcast following the usual rules.
-
-    Parameters
-    ----------
-    f: theano compiled function
-    inputs: list of theano variables used as inputs for the function
-    givens: theano variable which is the output of the function
-
-    Notes
-    -----
-    If inputs is an empty list (theano function with no inputs needed), then
-    the same `f` is returned.
-    Only functions that return a single theano variable's value can be
-    vectorized.
-
-    Returns
-    -------
-    A function which wraps `f` with numpy.vectorize with the apropriate call
-    signature.
-    """
-    inputs_signatures = ",".join(
-        [
-            get_vectorize_signature(var, var_name=f"i_{input_ind}")
-            for input_ind, var in enumerate(inputs)
-        ]
-    )
-    if len(inputs_signatures) > 0:
-        output_signature = get_vectorize_signature(output, var_name="o")
-        signature = inputs_signatures + "->" + output_signature
-
-        return np.vectorize(f, signature=signature)
-    else:
-        return f
-
-
-def get_vectorize_signature(var, var_name="i"):
-    if var.ndim == 0:
-        return "()"
-    else:
-        sig = ",".join([f"{var_name}_{axis_ind}" for axis_ind in range(var.ndim)])
-        return f"({sig})"
-
-
-def _draw_value(param, point=None, givens=None, size=None):
-    """Draw a random value from a distribution or return a constant.
-
-    Parameters
-    ----------
-    param: number, array like, theano variable or pymc3 random variable
-        The value or distribution. Constants or shared variables
-        will be converted to an array and returned. Theano variables
-        are evaluated. If `param` is a pymc3 random variables, draw
-        a new value from it and return that, unless a value is specified
-        in `point`.
-    point: dict, optional
-        A dictionary from pymc3 variable names to their values.
-    givens: dict, optional
-        A dictionary from theano variables to their values. These values
-        are used to evaluate `param` if it is a theano variable.
-    size: int, optional
-        Number of samples
-    """
-    if isinstance(param, (numbers.Number, np.ndarray)):
-        return param
-    elif isinstance(param, theano_constant):
-        return param.value
-    elif isinstance(param, tt.sharedvar.SharedVariable):
-        return param.get_value()
-    elif isinstance(param, (tt.TensorVariable, MultiObservedRV)):
-        if point and hasattr(param, "model") and param.name in point:
-            return point[param.name]
-        elif hasattr(param, "random") and param.random is not None:
-            return param.random(point=point, size=size)
-        elif (
-            hasattr(param, "distribution")
-            and hasattr(param.distribution, "random")
-            and param.distribution.random is not None
-        ):
-            if hasattr(param, "observations"):
-                # shape inspection for ObservedRV
-                dist_tmp = param.distribution
-                try:
-                    distshape = param.observations.shape.eval()
-                except AttributeError:
-                    distshape = param.observations.shape
-
-                dist_tmp.shape = distshape
-                try:
-                    return dist_tmp.random(point=point, size=size)
-                except (ValueError, TypeError):
-                    # reset shape to account for shape changes
-                    # with theano.shared inputs
-                    dist_tmp.shape = np.array([])
-                    # We want to draw values to infer the dist_shape,
-                    # we don't want to store these drawn values to the context
-                    with _DrawValuesContextBlocker():
-                        val = np.atleast_1d(dist_tmp.random(point=point, size=None))
-                    # Sometimes point may change the size of val but not the
-                    # distribution's shape
-                    if point and size is not None:
-                        temp_size = np.atleast_1d(size)
-                        if all(val.shape[: len(temp_size)] == temp_size):
-                            dist_tmp.shape = val.shape[len(temp_size) :]
-                        else:
-                            dist_tmp.shape = val.shape
-                return dist_tmp.random(point=point, size=size)
-            else:
-                return param.distribution.random(point=point, size=size)
-        else:
-            if givens:
-                variables, values = list(zip(*givens))
-            else:
-                variables = values = []
-            # We only truly care if the ancestors of param that were given
-            # value have the matching dshape and val.shape
-            param_ancestors = set(theano.graph.basic.ancestors([param], blockers=list(variables)))
-            inputs = [(var, val) for var, val in zip(variables, values) if var in param_ancestors]
-            if inputs:
-                input_vars, input_vals = list(zip(*inputs))
-            else:
-                input_vars = []
-                input_vals = []
-            func = _compile_theano_function(param, input_vars)
-            output = func(*input_vals)
-            return output
-    raise ValueError("Unexpected type in draw_value: %s" % type(param))
-
-
-def generate_samples(generator, *args, **kwargs):
-    """Generate samples from the distribution of a random variable.
-
-    Parameters
-    ----------
-    generator: function
-        Function to generate the random samples. The function is
-        expected take parameters for generating samples and
-        a keyword argument ``size`` which determines the shape
-        of the samples.
-        The args and kwargs (stripped of the keywords below) will be
-        passed to the generator function.
-
-    keyword arguments
-    ~~~~~~~~~~~~~~~~~
-
-    dist_shape: int or tuple of int
-        The shape of the random variable (i.e., the shape attribute).
-    size: int or tuple of int
-        The required shape of the samples.
-    broadcast_shape: tuple of int or None
-        The shape resulting from the broadcasting of the parameters.
-        If not specified it will be inferred from the shape of the
-        parameters. This may be required when the parameter shape
-        does not determine the shape of a single sample, for example,
-        the shape of the probabilities in the Categorical distribution.
-    not_broadcast_kwargs: dict or None
-        Key word argument dictionary to provide to the random generator, which
-        must not be broadcasted with the rest of the args and kwargs.
-
-    Any remaining args and kwargs are passed on to the generator function.
-    """
-    dist_shape = kwargs.pop("dist_shape", ())
-    size = kwargs.pop("size", None)
-    broadcast_shape = kwargs.pop("broadcast_shape", None)
-    not_broadcast_kwargs = kwargs.pop("not_broadcast_kwargs", None)
-    if not_broadcast_kwargs is None:
-        not_broadcast_kwargs = dict()
-
-    # Parse out raw input parameters for the generator
-    args = tuple(p[0] if isinstance(p, tuple) else p for p in args)
-    for key in kwargs:
-        p = kwargs[key]
-        kwargs[key] = p[0] if isinstance(p, tuple) else p
-
-    # Convert size and dist_shape to tuples
-    size_tup = to_tuple(size)
-    dist_shape = to_tuple(dist_shape)
-    if dist_shape[: len(size_tup)] == size_tup:
-        # dist_shape is prepended with size_tup. This is not a consequence
-        # of the parameters being drawn size_tup times! By chance, the
-        # distribution's shape has its first elements equal to size_tup.
-        # This means that we must prepend the size_tup to dist_shape, and
-        # check if that broadcasts well with the parameters
-        _dist_shape = size_tup + dist_shape
-    else:
-        _dist_shape = dist_shape
-
-    if broadcast_shape is None:
-        # If broadcast_shape is not explicitly provided, it is inferred as the
-        # broadcasted shape of the input parameter and dist_shape, taking into
-        # account the potential size prefix
-        inputs = args + tuple(kwargs.values())
-        broadcast_shape = broadcast_dist_samples_shape(
-            [np.asarray(i).shape for i in inputs] + [_dist_shape], size=size_tup
-        )
-        # We do this instead of broadcast_distribution_samples to avoid
-        # creating a dummy array with dist_shape in memory
-        inputs = get_broadcastable_dist_samples(
-            inputs,
-            size=size_tup,
-            must_bcast_with=broadcast_shape,
-        )
-        # We modify the arguments with their broadcasted counterparts
-        args = tuple(inputs[: len(args)])
-        for offset, key in enumerate(kwargs):
-            kwargs[key] = inputs[len(args) + offset]
-    # Update kwargs with the keyword arguments that were not broadcasted
-    kwargs.update(not_broadcast_kwargs)
-
-    # We ensure that broadcast_shape is a tuple
-    broadcast_shape = to_tuple(broadcast_shape)
-
-    try:
-        dist_bcast_shape = broadcast_dist_samples_shape(
-            [_dist_shape, broadcast_shape],
-            size=size,
-        )
-    except (ValueError, TypeError):
-        raise TypeError(
-            """Attempted to generate values with incompatible shapes:
-            size: {size}
-            size_tup: {size_tup}
-            broadcast_shape[:len(size_tup)] == size_tup: {size_prepended}
-            dist_shape: {dist_shape}
-            broadcast_shape: {broadcast_shape}
-        """.format(
-                size=size,
-                size_tup=size_tup,
-                dist_shape=dist_shape,
-                broadcast_shape=broadcast_shape,
-                size_prepended=broadcast_shape[: len(size_tup)] == size_tup,
-            )
-        )
-    if dist_bcast_shape[: len(size_tup)] == size_tup:
-        samples = generator(size=dist_bcast_shape, *args, **kwargs)
-    else:
-        samples = generator(size=size_tup + dist_bcast_shape, *args, **kwargs)
-
-    return np.asarray(samples)
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
index 756269d3306..c190fad28e1 100644
--- a/pymc3/distributions/mixture.py
+++ b/pymc3/distributions/mixture.py
@@ -19,20 +19,9 @@
 import theano.tensor as tt
 
 from pymc3.distributions.continuous import Normal, get_tau_sigma
-from pymc3.distributions.dist_math import bound, random_choice
-from pymc3.distributions.distribution import (
-    Discrete,
-    Distribution,
-    _DrawValuesContext,
-    _DrawValuesContextBlocker,
-    draw_values,
-    generate_samples,
-)
-from pymc3.distributions.shape_utils import (
-    broadcast_distribution_samples,
-    get_broadcastable_dist_samples,
-    to_tuple,
-)
+from pymc3.distributions.dist_math import bound
+from pymc3.distributions.distribution import Discrete, Distribution
+from pymc3.distributions.shape_utils import to_tuple
 from pymc3.math import logsumexp
 from pymc3.theanof import _conversion_map, take_along_axis
 
@@ -314,29 +303,30 @@ def _comp_modes(self):
             return tt.squeeze(tt.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=-1))
 
     def _comp_samples(self, point=None, size=None, comp_dist_shapes=None, broadcast_shape=None):
-        if self.comp_is_distribution:
-            samples = self._comp_dists.random(point=point, size=size)
-        else:
-            if comp_dist_shapes is None:
-                comp_dist_shapes = self._comp_dist_shapes
-            if broadcast_shape is None:
-                broadcast_shape = self._sample_shape
-            samples = []
-            for dist_shape, generator in zip(comp_dist_shapes, self._generators):
-                sample = generate_samples(
-                    generator=generator,
-                    dist_shape=dist_shape,
-                    broadcast_shape=broadcast_shape,
-                    point=point,
-                    size=size,
-                    not_broadcast_kwargs={"raw_size_": size},
-                )
-                samples.append(sample)
-            samples = np.array(broadcast_distribution_samples(samples, size=size))
-            # In the logp we assume the last axis holds the mixture components
-            # so we move the axis to the last dimension
-            samples = np.moveaxis(samples, 0, -1)
-        return samples.astype(self.dtype)
+        # if self.comp_is_distribution:
+        #     samples = self._comp_dists.random(point=point, size=size)
+        # else:
+        #     if comp_dist_shapes is None:
+        #         comp_dist_shapes = self._comp_dist_shapes
+        #     if broadcast_shape is None:
+        #         broadcast_shape = self._sample_shape
+        #     samples = []
+        #     for dist_shape, generator in zip(comp_dist_shapes, self._generators):
+        #         sample = generate_samples(
+        #             generator=generator,
+        #             dist_shape=dist_shape,
+        #             broadcast_shape=broadcast_shape,
+        #             point=point,
+        #             size=size,
+        #             not_broadcast_kwargs={"raw_size_": size},
+        #         )
+        #         samples.append(sample)
+        #     samples = np.array(broadcast_distribution_samples(samples, size=size))
+        #     # In the logp we assume the last axis holds the mixture components
+        #     # so we move the axis to the last dimension
+        #     samples = np.moveaxis(samples, 0, -1)
+        # return samples.astype(self.dtype)
+        pass
 
     def infer_comp_dist_shapes(self, point=None):
         """Try to infer the shapes of the component distributions,
@@ -367,48 +357,48 @@ def infer_comp_dist_shapes(self, point=None):
             The shape that results from broadcasting all component's shapes
             together.
         """
-        if self.comp_is_distribution:
-            if len(self._comp_dist_shapes) > 0:
-                comp_dist_shapes = self._comp_dist_shapes
-            else:
-                # Happens when the distribution is a scalar or when it was not
-                # given a shape. In these cases we try to draw a single value
-                # to check its shape, we use the provided point dictionary
-                # hoping that it can circumvent the Flat and HalfFlat
-                # undrawable distributions.
-                with _DrawValuesContextBlocker():
-                    test_sample = self._comp_dists.random(point=point, size=None)
-                    comp_dist_shapes = test_sample.shape
-            broadcast_shape = comp_dist_shapes
-        else:
-            # Now we check the comp_dists distribution shape, see what
-            # the broadcast shape would be. This shape will be the dist_shape
-            # used by generate samples (the shape of a single random sample)
-            # from the mixture
-            comp_dist_shapes = []
-            for dist_shape, comp_dist in zip(self._comp_dist_shapes, self._comp_dists):
-                if dist_shape == tuple():
-                    # Happens when the distribution is a scalar or when it was
-                    # not given a shape. In these cases we try to draw a single
-                    # value to check its shape, we use the provided point
-                    # dictionary hoping that it can circumvent the Flat and
-                    # HalfFlat undrawable distributions.
-                    with _DrawValuesContextBlocker():
-                        test_sample = comp_dist.random(point=point, size=None)
-                        dist_shape = test_sample.shape
-                comp_dist_shapes.append(dist_shape)
-            # All component distributions must broadcast with each other
-            try:
-                broadcast_shape = np.broadcast(
-                    *[np.empty(shape) for shape in comp_dist_shapes]
-                ).shape
-            except Exception:
-                raise TypeError(
-                    "Inferred comp_dist shapes do not broadcast "
-                    "with each other. comp_dists inferred shapes "
-                    "are: {}".format(comp_dist_shapes)
-                )
-        return comp_dist_shapes, broadcast_shape
+        # if self.comp_is_distribution:
+        #     if len(self._comp_dist_shapes) > 0:
+        #         comp_dist_shapes = self._comp_dist_shapes
+        #     else:
+        #         # Happens when the distribution is a scalar or when it was not
+        #         # given a shape. In these cases we try to draw a single value
+        #         # to check its shape, we use the provided point dictionary
+        #         # hoping that it can circumvent the Flat and HalfFlat
+        #         # undrawable distributions.
+        #         with _DrawValuesContextBlocker():
+        #             test_sample = self._comp_dists.random(point=point, size=None)
+        #             comp_dist_shapes = test_sample.shape
+        #     broadcast_shape = comp_dist_shapes
+        # else:
+        #     # Now we check the comp_dists distribution shape, see what
+        #     # the broadcast shape would be. This shape will be the dist_shape
+        #     # used by generate samples (the shape of a single random sample)
+        #     # from the mixture
+        #     comp_dist_shapes = []
+        #     for dist_shape, comp_dist in zip(self._comp_dist_shapes, self._comp_dists):
+        #         if dist_shape == tuple():
+        #             # Happens when the distribution is a scalar or when it was
+        #             # not given a shape. In these cases we try to draw a single
+        #             # value to check its shape, we use the provided point
+        #             # dictionary hoping that it can circumvent the Flat and
+        #             # HalfFlat undrawable distributions.
+        #             with _DrawValuesContextBlocker():
+        #                 test_sample = comp_dist.random(point=point, size=None)
+        #                 dist_shape = test_sample.shape
+        #         comp_dist_shapes.append(dist_shape)
+        #     # All component distributions must broadcast with each other
+        #     try:
+        #         broadcast_shape = np.broadcast(
+        #             *[np.empty(shape) for shape in comp_dist_shapes]
+        #         ).shape
+        #     except Exception:
+        #         raise TypeError(
+        #             "Inferred comp_dist shapes do not broadcast "
+        #             "with each other. comp_dists inferred shapes "
+        #             "are: {}".format(comp_dist_shapes)
+        #         )
+        # return comp_dist_shapes, broadcast_shape
 
     def logp(self, value):
         """
@@ -451,122 +441,122 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        # Convert size to tuple
-        size = to_tuple(size)
-        # Draw mixture weights and infer the comp_dists shapes
-        with _DrawValuesContext() as draw_context:
-            # We first need to check w and comp_tmp shapes and re compute size
-            w = draw_values([self.w], point=point, size=size)[0]
-            comp_dist_shapes, broadcast_shape = self.infer_comp_dist_shapes(point=point)
-
-        # When size is not None, it's hard to tell the w parameter shape
-        if size is not None and w.shape[: len(size)] == size:
-            w_shape = w.shape[len(size) :]
-        else:
-            w_shape = w.shape
-
-        # Try to determine parameter shape and dist_shape
-        if self.comp_is_distribution:
-            param_shape = np.broadcast(np.empty(w_shape), np.empty(broadcast_shape)).shape
-        else:
-            param_shape = np.broadcast(np.empty(w_shape), np.empty(broadcast_shape + (1,))).shape
-        if np.asarray(self.shape).size != 0:
-            dist_shape = np.broadcast(np.empty(self.shape), np.empty(param_shape[:-1])).shape
-        else:
-            dist_shape = param_shape[:-1]
-
-        # Try to determine the size that must be used to get the mixture
-        # components (i.e. get random choices using w).
-        # 1. There must be size independent choices based on w.
-        # 2. There must also be independent draws for each non singleton axis
-        # of w.
-        # 3. There must also be independent draws for each dimension added by
-        # self.shape with respect to the w.ndim. These usually correspond to
-        # observed variables with batch shapes
-        wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:-1]
-        psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:-1]
-        w_sample_size = []
-        # Loop through the dist_shape to get the conditions 2 and 3 first
-        for i in range(len(dist_shape)):
-            if dist_shape[i] != psh[i] and wsh[i] == 1:
-                # self.shape[i] is a non singleton dimension (usually caused by
-                # observed data)
-                sh = dist_shape[i]
-            else:
-                sh = wsh[i]
-            w_sample_size.append(sh)
-        if size is not None and w_sample_size[: len(size)] != size:
-            w_sample_size = size + tuple(w_sample_size)
-        # Broadcast w to the w_sample_size (add a singleton last axis for the
-        # mixture components)
-        w = broadcast_distribution_samples([w, np.empty(w_sample_size + (1,))], size=size)[0]
-
-        # Semiflatten the mixture weights. The last axis is the number of
-        # mixture mixture components, and the rest is all about size,
-        # dist_shape and broadcasting
-        w_ = np.reshape(w, (-1, w.shape[-1]))
-        w_samples = random_choice(p=w_, size=None)  # w's shape already includes size
-        # Now we broadcast the chosen components to the dist_shape
-        w_samples = np.reshape(w_samples, w.shape[:-1])
-        if size is not None and dist_shape[: len(size)] != size:
-            w_samples = np.broadcast_to(w_samples, size + dist_shape)
-        else:
-            w_samples = np.broadcast_to(w_samples, dist_shape)
-
-        # When size is not None, maybe dist_shape partially overlaps with size
-        if size is not None:
-            if size == dist_shape:
-                size = None
-            elif size[-len(dist_shape) :] == dist_shape:
-                size = size[: len(size) - len(dist_shape)]
-
-        # We get an integer _size instead of a tuple size for drawing the
-        # mixture, then we just reshape the output
-        if size is None:
-            _size = None
-        else:
-            _size = int(np.prod(size))
-
-        # Compute the total size of the mixture's random call with size
-        if _size is not None:
-            output_size = int(_size * np.prod(dist_shape) * param_shape[-1])
-        else:
-            output_size = int(np.prod(dist_shape) * param_shape[-1])
-        # Get the size we need for the mixture's random call
-        if self.comp_is_distribution:
-            mixture_size = int(output_size // np.prod(broadcast_shape))
-        else:
-            mixture_size = int(output_size // (np.prod(broadcast_shape) * param_shape[-1]))
-        if mixture_size == 1 and _size is None:
-            mixture_size = None
-
-        # Sample from the mixture
-        with draw_context:
-            mixed_samples = self._comp_samples(
-                point=point,
-                size=mixture_size,
-                broadcast_shape=broadcast_shape,
-                comp_dist_shapes=comp_dist_shapes,
-            )
-        # Test that the mixture has the same number of "samples" as w
-        if w_samples.size != (mixed_samples.size // w.shape[-1]):
-            raise ValueError(
-                "Inconsistent number of samples from the "
-                "mixture and mixture weights. Drew {} mixture "
-                "weights elements, and {} samples from the "
-                "mixture components.".format(w_samples.size, mixed_samples.size // w.shape[-1])
-            )
-        # Semiflatten the mixture to be able to zip it with w_samples
-        w_samples = w_samples.flatten()
-        mixed_samples = np.reshape(mixed_samples, (-1, w.shape[-1]))
-        # Select the samples from the mixture
-        samples = np.array([mixed[choice] for choice, mixed in zip(w_samples, mixed_samples)])
-        # Reshape the samples to the correct output shape
-        if size is None:
-            samples = np.reshape(samples, dist_shape)
-        else:
-            samples = np.reshape(samples, size + dist_shape)
-        return samples
+        # # Convert size to tuple
+        # size = to_tuple(size)
+        # # Draw mixture weights and infer the comp_dists shapes
+        # with _DrawValuesContext() as draw_context:
+        #     # We first need to check w and comp_tmp shapes and re compute size
+        #     w = draw_values([self.w], point=point, size=size)[0]
+        #     comp_dist_shapes, broadcast_shape = self.infer_comp_dist_shapes(point=point)
+        #
+        # # When size is not None, it's hard to tell the w parameter shape
+        # if size is not None and w.shape[: len(size)] == size:
+        #     w_shape = w.shape[len(size) :]
+        # else:
+        #     w_shape = w.shape
+        #
+        # # Try to determine parameter shape and dist_shape
+        # if self.comp_is_distribution:
+        #     param_shape = np.broadcast(np.empty(w_shape), np.empty(broadcast_shape)).shape
+        # else:
+        #     param_shape = np.broadcast(np.empty(w_shape), np.empty(broadcast_shape + (1,))).shape
+        # if np.asarray(self.shape).size != 0:
+        #     dist_shape = np.broadcast(np.empty(self.shape), np.empty(param_shape[:-1])).shape
+        # else:
+        #     dist_shape = param_shape[:-1]
+        #
+        # # Try to determine the size that must be used to get the mixture
+        # # components (i.e. get random choices using w).
+        # # 1. There must be size independent choices based on w.
+        # # 2. There must also be independent draws for each non singleton axis
+        # # of w.
+        # # 3. There must also be independent draws for each dimension added by
+        # # self.shape with respect to the w.ndim. These usually correspond to
+        # # observed variables with batch shapes
+        # wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:-1]
+        # psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:-1]
+        # w_sample_size = []
+        # # Loop through the dist_shape to get the conditions 2 and 3 first
+        # for i in range(len(dist_shape)):
+        #     if dist_shape[i] != psh[i] and wsh[i] == 1:
+        #         # self.shape[i] is a non singleton dimension (usually caused by
+        #         # observed data)
+        #         sh = dist_shape[i]
+        #     else:
+        #         sh = wsh[i]
+        #     w_sample_size.append(sh)
+        # if size is not None and w_sample_size[: len(size)] != size:
+        #     w_sample_size = size + tuple(w_sample_size)
+        # # Broadcast w to the w_sample_size (add a singleton last axis for the
+        # # mixture components)
+        # w = broadcast_distribution_samples([w, np.empty(w_sample_size + (1,))], size=size)[0]
+        #
+        # # Semiflatten the mixture weights. The last axis is the number of
+        # # mixture mixture components, and the rest is all about size,
+        # # dist_shape and broadcasting
+        # w_ = np.reshape(w, (-1, w.shape[-1]))
+        # w_samples = random_choice(p=w_, size=None)  # w's shape already includes size
+        # # Now we broadcast the chosen components to the dist_shape
+        # w_samples = np.reshape(w_samples, w.shape[:-1])
+        # if size is not None and dist_shape[: len(size)] != size:
+        #     w_samples = np.broadcast_to(w_samples, size + dist_shape)
+        # else:
+        #     w_samples = np.broadcast_to(w_samples, dist_shape)
+        #
+        # # When size is not None, maybe dist_shape partially overlaps with size
+        # if size is not None:
+        #     if size == dist_shape:
+        #         size = None
+        #     elif size[-len(dist_shape) :] == dist_shape:
+        #         size = size[: len(size) - len(dist_shape)]
+        #
+        # # We get an integer _size instead of a tuple size for drawing the
+        # # mixture, then we just reshape the output
+        # if size is None:
+        #     _size = None
+        # else:
+        #     _size = int(np.prod(size))
+        #
+        # # Compute the total size of the mixture's random call with size
+        # if _size is not None:
+        #     output_size = int(_size * np.prod(dist_shape) * param_shape[-1])
+        # else:
+        #     output_size = int(np.prod(dist_shape) * param_shape[-1])
+        # # Get the size we need for the mixture's random call
+        # if self.comp_is_distribution:
+        #     mixture_size = int(output_size // np.prod(broadcast_shape))
+        # else:
+        #     mixture_size = int(output_size // (np.prod(broadcast_shape) * param_shape[-1]))
+        # if mixture_size == 1 and _size is None:
+        #     mixture_size = None
+        #
+        # # Sample from the mixture
+        # with draw_context:
+        #     mixed_samples = self._comp_samples(
+        #         point=point,
+        #         size=mixture_size,
+        #         broadcast_shape=broadcast_shape,
+        #         comp_dist_shapes=comp_dist_shapes,
+        #     )
+        # # Test that the mixture has the same number of "samples" as w
+        # if w_samples.size != (mixed_samples.size // w.shape[-1]):
+        #     raise ValueError(
+        #         "Inconsistent number of samples from the "
+        #         "mixture and mixture weights. Drew {} mixture "
+        #         "weights elements, and {} samples from the "
+        #         "mixture components.".format(w_samples.size, mixed_samples.size // w.shape[-1])
+        #     )
+        # # Semiflatten the mixture to be able to zip it with w_samples
+        # w_samples = w_samples.flatten()
+        # mixed_samples = np.reshape(mixed_samples, (-1, w.shape[-1]))
+        # # Select the samples from the mixture
+        # samples = np.array([mixed[choice] for choice, mixed in zip(w_samples, mixed_samples)])
+        # # Reshape the samples to the correct output shape
+        # if size is None:
+        #     samples = np.reshape(samples, dist_shape)
+        # else:
+        #     samples = np.reshape(samples, size + dist_shape)
+        # return samples
 
     def _distr_parameters_for_repr(self):
         return []
@@ -779,95 +769,95 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        sample_shape = to_tuple(size)
-        mixture_axis = self.mixture_axis
-
-        # First we draw values for the mixture component weights
-        (w,) = draw_values([self.w], point=point, size=size)
-
-        # We now draw random choices from those weights.
-        # However, we have to ensure that the number of choices has the
-        # sample_shape present.
-        w_shape = w.shape
-        batch_shape = self.comp_dists.shape[: mixture_axis + 1]
-        param_shape = np.broadcast(np.empty(w_shape), np.empty(batch_shape)).shape
-        event_shape = self.comp_dists.shape[mixture_axis + 1 :]
-
-        if np.asarray(self.shape).size != 0:
-            comp_dists_ndim = len(self.comp_dists.shape)
-
-            # If event_shape of both comp_dists and supplied shape matches,
-            # broadcast only batch_shape
-            # else broadcast the entire given shape with batch_shape.
-            if list(self.shape[mixture_axis - comp_dists_ndim + 1 :]) == list(event_shape):
-                dist_shape = np.broadcast(
-                    np.empty(self.shape[:mixture_axis]), np.empty(param_shape[:mixture_axis])
-                ).shape
-            else:
-                dist_shape = np.broadcast(
-                    np.empty(self.shape), np.empty(param_shape[:mixture_axis])
-                ).shape
-        else:
-            dist_shape = param_shape[:mixture_axis]
-
-        # Try to determine the size that must be used to get the mixture
-        # components (i.e. get random choices using w).
-        # 1. There must be size independent choices based on w.
-        # 2. There must also be independent draws for each non singleton axis
-        # of w.
-        # 3. There must also be independent draws for each dimension added by
-        # self.shape with respect to the w.ndim. These usually correspond to
-        # observed variables with batch shapes
-        wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:mixture_axis]
-        psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:mixture_axis]
-        w_sample_size = []
-        # Loop through the dist_shape to get the conditions 2 and 3 first
-        for i in range(len(dist_shape)):
-            if dist_shape[i] != psh[i] and wsh[i] == 1:
-                # self.shape[i] is a non singleton dimension (usually caused by
-                # observed data)
-                sh = dist_shape[i]
-            else:
-                sh = wsh[i]
-            w_sample_size.append(sh)
-
-        if sample_shape is not None and w_sample_size[: len(sample_shape)] != sample_shape:
-            w_sample_size = sample_shape + tuple(w_sample_size)
-
-        choices = random_choice(p=w, size=w_sample_size)
-
-        # We now draw samples from the mixture components random method
-        comp_samples = self.comp_dists.random(point=point, size=size)
-        if comp_samples.shape[: len(sample_shape)] != sample_shape:
-            comp_samples = np.broadcast_to(
-                comp_samples,
-                shape=sample_shape + comp_samples.shape,
-            )
-
-        # At this point the shapes of the arrays involved are:
-        # comp_samples.shape = (sample_shape, batch_shape, mixture_axis, event_shape)
-        # choices.shape = (sample_shape, batch_shape)
+        # sample_shape = to_tuple(size)
+        # mixture_axis = self.mixture_axis
         #
-        # To be able to take the choices along the mixture_axis of the
-        # comp_samples, we have to add in dimensions to the right of the
-        # choices array.
-        # We also need to make sure that the batch_shapes of both the comp_samples
-        # and choices broadcast with each other.
-
-        choices = np.reshape(choices, choices.shape + (1,) * (1 + len(event_shape)))
-
-        choices, comp_samples = get_broadcastable_dist_samples([choices, comp_samples], size=size)
-
-        # We now take the choices of the mixture components along the mixture_axis
-        # but we use the negative index representation to be able to handle the
-        # sample_shape
-        samples = np.take_along_axis(
-            comp_samples, choices, axis=mixture_axis - len(self.comp_dists.shape)
-        )
-
-        # The `samples` array still has the `mixture_axis`, so we must remove it:
-        output = samples[(..., 0) + (slice(None),) * len(event_shape)]
-        return output
+        # # First we draw values for the mixture component weights
+        # (w,) = draw_values([self.w], point=point, size=size)
+        #
+        # # We now draw random choices from those weights.
+        # # However, we have to ensure that the number of choices has the
+        # # sample_shape present.
+        # w_shape = w.shape
+        # batch_shape = self.comp_dists.shape[: mixture_axis + 1]
+        # param_shape = np.broadcast(np.empty(w_shape), np.empty(batch_shape)).shape
+        # event_shape = self.comp_dists.shape[mixture_axis + 1 :]
+        #
+        # if np.asarray(self.shape).size != 0:
+        #     comp_dists_ndim = len(self.comp_dists.shape)
+        #
+        #     # If event_shape of both comp_dists and supplied shape matches,
+        #     # broadcast only batch_shape
+        #     # else broadcast the entire given shape with batch_shape.
+        #     if list(self.shape[mixture_axis - comp_dists_ndim + 1 :]) == list(event_shape):
+        #         dist_shape = np.broadcast(
+        #             np.empty(self.shape[:mixture_axis]), np.empty(param_shape[:mixture_axis])
+        #         ).shape
+        #     else:
+        #         dist_shape = np.broadcast(
+        #             np.empty(self.shape), np.empty(param_shape[:mixture_axis])
+        #         ).shape
+        # else:
+        #     dist_shape = param_shape[:mixture_axis]
+        #
+        # # Try to determine the size that must be used to get the mixture
+        # # components (i.e. get random choices using w).
+        # # 1. There must be size independent choices based on w.
+        # # 2. There must also be independent draws for each non singleton axis
+        # # of w.
+        # # 3. There must also be independent draws for each dimension added by
+        # # self.shape with respect to the w.ndim. These usually correspond to
+        # # observed variables with batch shapes
+        # wsh = (1,) * (len(dist_shape) - len(w_shape) + 1) + w_shape[:mixture_axis]
+        # psh = (1,) * (len(dist_shape) - len(param_shape) + 1) + param_shape[:mixture_axis]
+        # w_sample_size = []
+        # # Loop through the dist_shape to get the conditions 2 and 3 first
+        # for i in range(len(dist_shape)):
+        #     if dist_shape[i] != psh[i] and wsh[i] == 1:
+        #         # self.shape[i] is a non singleton dimension (usually caused by
+        #         # observed data)
+        #         sh = dist_shape[i]
+        #     else:
+        #         sh = wsh[i]
+        #     w_sample_size.append(sh)
+        #
+        # if sample_shape is not None and w_sample_size[: len(sample_shape)] != sample_shape:
+        #     w_sample_size = sample_shape + tuple(w_sample_size)
+        #
+        # choices = random_choice(p=w, size=w_sample_size)
+        #
+        # # We now draw samples from the mixture components random method
+        # comp_samples = self.comp_dists.random(point=point, size=size)
+        # if comp_samples.shape[: len(sample_shape)] != sample_shape:
+        #     comp_samples = np.broadcast_to(
+        #         comp_samples,
+        #         shape=sample_shape + comp_samples.shape,
+        #     )
+        #
+        # # At this point the shapes of the arrays involved are:
+        # # comp_samples.shape = (sample_shape, batch_shape, mixture_axis, event_shape)
+        # # choices.shape = (sample_shape, batch_shape)
+        # #
+        # # To be able to take the choices along the mixture_axis of the
+        # # comp_samples, we have to add in dimensions to the right of the
+        # # choices array.
+        # # We also need to make sure that the batch_shapes of both the comp_samples
+        # # and choices broadcast with each other.
+        #
+        # choices = np.reshape(choices, choices.shape + (1,) * (1 + len(event_shape)))
+        #
+        # choices, comp_samples = get_broadcastable_dist_samples([choices, comp_samples], size=size)
+        #
+        # # We now take the choices of the mixture components along the mixture_axis
+        # # but we use the negative index representation to be able to handle the
+        # # sample_shape
+        # samples = np.take_along_axis(
+        #     comp_samples, choices, axis=mixture_axis - len(self.comp_dists.shape)
+        # )
+        #
+        # # The `samples` array still has the `mixture_axis`, so we must remove it:
+        # output = samples[(..., 0) + (slice(None),) * len(event_shape)]
+        # return output
 
     def _distr_parameters_for_repr(self):
         return []
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
index bf8745c6531..4a77ffa9b0c 100755
--- a/pymc3/distributions/multivariate.py
+++ b/pymc3/distributions/multivariate.py
@@ -36,18 +36,10 @@
 from pymc3.distributions import _logp, transforms
 from pymc3.distributions.continuous import ChiSquared, Normal
 from pymc3.distributions.dist_math import bound, factln, logpow
-from pymc3.distributions.distribution import (
-    Continuous,
-    Discrete,
-    _DrawValuesContext,
-    draw_values,
-    generate_samples,
-)
-from pymc3.distributions.shape_utils import broadcast_dist_samples_to, to_tuple
+from pymc3.distributions.distribution import Continuous, Discrete
+from pymc3.distributions.shape_utils import to_tuple
 from pymc3.distributions.special import gammaln, multigammaln
-from pymc3.exceptions import ShapeError
 from pymc3.math import kron_diag, kron_dot, kron_solve_lower, kronecker
-from pymc3.model import Deterministic
 from pymc3.theanof import floatX, intX
 
 __all__ = [
@@ -265,36 +257,36 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        size = to_tuple(size)
-
-        param_attribute = getattr(self, "chol_cov" if self._cov_type == "chol" else self._cov_type)
-        mu, param = draw_values([self.mu, param_attribute], point=point, size=size)
-
-        dist_shape = to_tuple(self.shape)
-        output_shape = size + dist_shape
-
-        # Simple, there can be only be 1 batch dimension, only available from `mu`.
-        # Insert it into `param` before events, if there is a sample shape in front.
-        if param.ndim > 2 and dist_shape[:-1]:
-            param = param.reshape(size + (1,) + param.shape[-2:])
-
-        mu = broadcast_dist_samples_to(to_shape=output_shape, samples=[mu], size=size)[0]
-        param = np.broadcast_to(param, shape=output_shape + dist_shape[-1:])
-
-        assert mu.shape == output_shape
-        assert param.shape == output_shape + dist_shape[-1:]
-
-        if self._cov_type == "cov":
-            chol = np.linalg.cholesky(param)
-        elif self._cov_type == "chol":
-            chol = param
-        else:  # tau -> chol -> swapaxes (chol, -1, -2) -> inv ...
-            lower_chol = np.linalg.cholesky(param)
-            upper_chol = np.swapaxes(lower_chol, -1, -2)
-            chol = np.linalg.inv(upper_chol)
-
-        standard_normal = np.random.standard_normal(output_shape)
-        return mu + np.einsum("...ij,...j->...i", chol, standard_normal)
+        # size = to_tuple(size)
+        #
+        # param_attribute = getattr(self, "chol_cov" if self._cov_type == "chol" else self._cov_type)
+        # mu, param = draw_values([self.mu, param_attribute], point=point, size=size)
+        #
+        # dist_shape = to_tuple(self.shape)
+        # output_shape = size + dist_shape
+        #
+        # # Simple, there can be only be 1 batch dimension, only available from `mu`.
+        # # Insert it into `param` before events, if there is a sample shape in front.
+        # if param.ndim > 2 and dist_shape[:-1]:
+        #     param = param.reshape(size + (1,) + param.shape[-2:])
+        #
+        # mu = broadcast_dist_samples_to(to_shape=output_shape, samples=[mu], size=size)[0]
+        # param = np.broadcast_to(param, shape=output_shape + dist_shape[-1:])
+        #
+        # assert mu.shape == output_shape
+        # assert param.shape == output_shape + dist_shape[-1:]
+        #
+        # if self._cov_type == "cov":
+        #     chol = np.linalg.cholesky(param)
+        # elif self._cov_type == "chol":
+        #     chol = param
+        # else:  # tau -> chol -> swapaxes (chol, -1, -2) -> inv ...
+        #     lower_chol = np.linalg.cholesky(param)
+        #     upper_chol = np.swapaxes(lower_chol, -1, -2)
+        #     chol = np.linalg.inv(upper_chol)
+        #
+        # standard_normal = np.random.standard_normal(output_shape)
+        # return mu + np.einsum("...ij,...j->...i", chol, standard_normal)
 
     def logp(self, value):
         """
@@ -388,24 +380,24 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        with _DrawValuesContext():
-            nu, mu = draw_values([self.nu, self.mu], point=point, size=size)
-            if self._cov_type == "cov":
-                (cov,) = draw_values([self.cov], point=point, size=size)
-                dist = MvNormal.dist(mu=np.zeros_like(mu), cov=cov, shape=self.shape)
-            elif self._cov_type == "tau":
-                (tau,) = draw_values([self.tau], point=point, size=size)
-                dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau, shape=self.shape)
-            else:
-                (chol,) = draw_values([self.chol_cov], point=point, size=size)
-                dist = MvNormal.dist(mu=np.zeros_like(mu), chol=chol, shape=self.shape)
-
-            samples = dist.random(point, size)
-
-        chi2_samples = np.random.chisquare(nu, size)
-        # Add distribution shape to chi2 samples
-        chi2_samples = chi2_samples.reshape(chi2_samples.shape + (1,) * len(self.shape))
-        return (samples / np.sqrt(chi2_samples / nu)) + mu
+        # with _DrawValuesContext():
+        #     nu, mu = draw_values([self.nu, self.mu], point=point, size=size)
+        #     if self._cov_type == "cov":
+        #         (cov,) = draw_values([self.cov], point=point, size=size)
+        #         dist = MvNormal.dist(mu=np.zeros_like(mu), cov=cov, shape=self.shape)
+        #     elif self._cov_type == "tau":
+        #         (tau,) = draw_values([self.tau], point=point, size=size)
+        #         dist = MvNormal.dist(mu=np.zeros_like(mu), tau=tau, shape=self.shape)
+        #     else:
+        #         (chol,) = draw_values([self.chol_cov], point=point, size=size)
+        #         dist = MvNormal.dist(mu=np.zeros_like(mu), chol=chol, shape=self.shape)
+        #
+        #     samples = dist.random(point, size)
+        #
+        # chi2_samples = np.random.chisquare(nu, size)
+        # # Add distribution shape to chi2 samples
+        # chi2_samples = chi2_samples.reshape(chi2_samples.shape + (1,) * len(self.shape))
+        # return (samples / np.sqrt(chi2_samples / nu)) + mu
 
     def logp(self, value):
         """
@@ -606,16 +598,16 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        n, p = draw_values([self.n, self.p], point=point, size=size)
-        samples = generate_samples(
-            self._random,
-            n,
-            p,
-            dist_shape=self.shape,
-            not_broadcast_kwargs={"raw_size": size},
-            size=size,
-        )
-        return samples
+        # n, p = draw_values([self.n, self.p], point=point, size=size)
+        # samples = generate_samples(
+        #     self._random,
+        #     n,
+        #     p,
+        #     dist_shape=self.shape,
+        #     not_broadcast_kwargs={"raw_size": size},
+        #     size=size,
+        # )
+        # return samples
 
     def logp(self, x):
         """
@@ -742,26 +734,26 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        n, a = draw_values([self.n, self.a], point=point, size=size)
-        samples = generate_samples(
-            self._random,
-            n,
-            a,
-            dist_shape=self.shape,
-            size=size,
-        )
-
-        # If distribution is initialized with .dist(), valid init shape is not asserted.
-        # Under normal use in a model context valid init shape is asserted at start.
-        expected_shape = to_tuple(size) + to_tuple(self.shape)
-        sample_shape = tuple(samples.shape)
-        if sample_shape != expected_shape:
-            raise ShapeError(
-                f"Expected sample shape was {expected_shape} but got {sample_shape}. "
-                "This may reflect an invalid initialization shape."
-            )
-
-        return samples
+        # n, a = draw_values([self.n, self.a], point=point, size=size)
+        # samples = generate_samples(
+        #     self._random,
+        #     n,
+        #     a,
+        #     dist_shape=self.shape,
+        #     size=size,
+        # )
+        #
+        # # If distribution is initialized with .dist(), valid init shape is not asserted.
+        # # Under normal use in a model context valid init shape is asserted at start.
+        # expected_shape = to_tuple(size) + to_tuple(self.shape)
+        # sample_shape = tuple(samples.shape)
+        # if sample_shape != expected_shape:
+        #     raise ShapeError(
+        #         f"Expected sample shape was {expected_shape} but got {sample_shape}. "
+        #         "This may reflect an invalid initialization shape."
+        #     )
+        #
+        # return samples
 
     def logp(self, value):
         """
@@ -920,9 +912,9 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        nu, V = draw_values([self.nu, self.V], point=point, size=size)
-        size = 1 if size is None else size
-        return generate_samples(stats.wishart.rvs, nu.item(), V, broadcast_shape=(size,))
+        # nu, V = draw_values([self.nu, self.V], point=point, size=size)
+        # size = 1 if size is None else size
+        # return generate_samples(stats.wishart.rvs, nu.item(), V, broadcast_shape=(size,))
 
     def logp(self, X):
         """
@@ -1038,9 +1030,9 @@ def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testv
 
     # L * A * A.T * L.T ~ Wishart(L*L.T, nu)
     if return_cholesky:
-        return Deterministic(name, tt.dot(L, A))
+        return pm.Deterministic(name, tt.dot(L, A))
     else:
-        return Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
+        return pm.Deterministic(name, tt.dot(tt.dot(tt.dot(L, A), A.T), L.T))
 
 
 def _lkj_normalizing_constant(eta, n):
@@ -1198,45 +1190,45 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        # Get parameters and broadcast them
-        n, eta = draw_values([self.n, self.eta], point=point, size=size)
-        broadcast_shape = np.broadcast(n, eta).shape
-        # We can only handle cov matrices with a constant n per random call
-        n = np.unique(n)
-        if len(n) > 1:
-            raise RuntimeError("Varying n is not supported for LKJCholeskyCov")
-        n = int(n[0])
-        dist_shape = ((n * (n + 1)) // 2,)
-        # We make sure that eta and the drawn n get their shapes broadcasted
-        eta = np.broadcast_to(eta, broadcast_shape)
-        # We change the size of the draw depending on the broadcast shape
-        sample_shape = broadcast_shape + dist_shape
-        if size is not None:
-            if not isinstance(size, tuple):
-                try:
-                    size = tuple(size)
-                except TypeError:
-                    size = (size,)
-            if size == sample_shape:
-                size = None
-            elif size == broadcast_shape:
-                size = None
-            elif size[-len(sample_shape) :] == sample_shape:
-                size = size[: len(size) - len(sample_shape)]
-            elif size[-len(broadcast_shape) :] == broadcast_shape:
-                size = size[: len(size) - len(broadcast_shape)]
-        # We will always provide _random with an integer size and then reshape
-        # the output to get the correct size
-        if size is not None:
-            _size = np.prod(size)
-        else:
-            _size = 1
-        samples = self._random(n, eta, size=_size)
-        if size is None:
-            samples = samples[0]
-        else:
-            samples = np.reshape(samples, size + sample_shape)
-        return samples
+        # # Get parameters and broadcast them
+        # n, eta = draw_values([self.n, self.eta], point=point, size=size)
+        # broadcast_shape = np.broadcast(n, eta).shape
+        # # We can only handle cov matrices with a constant n per random call
+        # n = np.unique(n)
+        # if len(n) > 1:
+        #     raise RuntimeError("Varying n is not supported for LKJCholeskyCov")
+        # n = int(n[0])
+        # dist_shape = ((n * (n + 1)) // 2,)
+        # # We make sure that eta and the drawn n get their shapes broadcasted
+        # eta = np.broadcast_to(eta, broadcast_shape)
+        # # We change the size of the draw depending on the broadcast shape
+        # sample_shape = broadcast_shape + dist_shape
+        # if size is not None:
+        #     if not isinstance(size, tuple):
+        #         try:
+        #             size = tuple(size)
+        #         except TypeError:
+        #             size = (size,)
+        #     if size == sample_shape:
+        #         size = None
+        #     elif size == broadcast_shape:
+        #         size = None
+        #     elif size[-len(sample_shape) :] == sample_shape:
+        #         size = size[: len(size) - len(sample_shape)]
+        #     elif size[-len(broadcast_shape) :] == broadcast_shape:
+        #         size = size[: len(size) - len(broadcast_shape)]
+        # # We will always provide _random with an integer size and then reshape
+        # # the output to get the correct size
+        # if size is not None:
+        #     _size = np.prod(size)
+        # else:
+        #     _size = 1
+        # samples = self._random(n, eta, size=_size)
+        # if size is None:
+        #     samples = samples[0]
+        # else:
+        #     samples = np.reshape(samples, size + sample_shape)
+        # return samples
 
     def _distr_parameters_for_repr(self):
         return ["eta", "n"]
@@ -1511,10 +1503,10 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        n, eta = draw_values([self.n, self.eta], point=point, size=size)
-        size = 1 if size is None else size
-        samples = generate_samples(self._random, n, eta, broadcast_shape=(size,))
-        return samples
+        # n, eta = draw_values([self.n, self.eta], point=point, size=size)
+        # size = 1 if size is None else size
+        # samples = generate_samples(self._random, n, eta, broadcast_shape=(size,))
+        # return samples
 
     def logp(self, x):
         """
@@ -1746,23 +1738,23 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        mu, colchol, rowchol = draw_values(
-            [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size
-        )
-        size = to_tuple(size)
-        dist_shape = to_tuple(self.shape)
-        output_shape = size + dist_shape
-
-        # Broadcasting all parameters
-        (mu,) = broadcast_dist_samples_to(to_shape=output_shape, samples=[mu], size=size)
-        rowchol = np.broadcast_to(rowchol, shape=size + rowchol.shape[-2:])
-
-        colchol = np.broadcast_to(colchol, shape=size + colchol.shape[-2:])
-        colchol = np.swapaxes(colchol, -1, -2)  # Take transpose
-
-        standard_normal = np.random.standard_normal(output_shape)
-        samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol))
-        return samples
+        # mu, colchol, rowchol = draw_values(
+        #     [self.mu, self.colchol_cov, self.rowchol_cov], point=point, size=size
+        # )
+        # size = to_tuple(size)
+        # dist_shape = to_tuple(self.shape)
+        # output_shape = size + dist_shape
+        #
+        # # Broadcasting all parameters
+        # (mu,) = broadcast_dist_samples_to(to_shape=output_shape, samples=[mu], size=size)
+        # rowchol = np.broadcast_to(rowchol, shape=size + rowchol.shape[-2:])
+        #
+        # colchol = np.broadcast_to(colchol, shape=size + colchol.shape[-2:])
+        # colchol = np.swapaxes(colchol, -1, -2)  # Take transpose
+        #
+        # standard_normal = np.random.standard_normal(output_shape)
+        # samples = mu + np.matmul(rowchol, np.matmul(standard_normal, colchol))
+        # return samples
 
     def _trquaddist(self, value):
         """Compute Tr[colcov^-1 @ (x - mu).T @ rowcov^-1 @ (x - mu)] and
diff --git a/pymc3/distributions/posterior_predictive.py b/pymc3/distributions/posterior_predictive.py
deleted file mode 100644
index 31aa3e40f58..00000000000
--- a/pymc3/distributions/posterior_predictive.py
+++ /dev/null
@@ -1,698 +0,0 @@
-from __future__ import annotations
-
-import contextvars
-import logging
-import numbers
-import warnings
-
-from collections import UserDict
-from contextlib import AbstractContextManager
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, cast, overload
-
-import numpy as np
-import theano.graph.basic
-import theano.graph.fg
-import theano.tensor as tt
-
-from arviz import InferenceData
-from typing_extensions import Literal, Protocol
-from xarray import Dataset
-
-from pymc3.backends.base import MultiTrace
-from pymc3.distributions.distribution import (
-    _compile_theano_function,
-    _DrawValuesContext,
-    _DrawValuesContextBlocker,
-    is_fast_drawable,
-    vectorized_ppc,
-)
-from pymc3.exceptions import IncorrectArgumentsError
-from pymc3.model import (
-    Model,
-    MultiObservedRV,
-    ObservedRV,
-    get_named_nodes_and_relations,
-    modelcontext,
-)
-from pymc3.util import chains_and_samples, dataset_to_point_list, get_var_name
-from pymc3.vartypes import theano_constant
-
-# Failing tests:
-#    test_mixture_random_shape::test_mixture_random_shape
-#
-
-Point = Dict[str, np.ndarray]
-
-
-class HasName(Protocol):
-    name: str
-
-
-class _TraceDict(UserDict):
-    """This class extends the standard trace-based representation
-    of traces by adding some helpful attributes used in posterior predictive
-    sampling.
-
-    Attributes
-    ~~~~~~~~~~
-        varnames: list of strings"""
-
-    varnames: list[str]
-    _len: int
-    data: Point
-
-    def __init__(
-        self,
-        point_list: list[Point] | None = None,
-        multi_trace: MultiTrace | None = None,
-        dict_: Point | None = None,
-    ):
-        """"""
-        if multi_trace:
-            assert point_list is None and dict_ is None
-            self.data = {}
-            self._len = sum(len(multi_trace._straces[chain]) for chain in multi_trace.chains)
-            self.varnames = multi_trace.varnames
-            for vn in multi_trace.varnames:
-                self.data[vn] = multi_trace.get_values(vn)
-        if point_list is not None:
-            assert multi_trace is None and dict_ is None
-            self.varnames = varnames = list(point_list[0].keys())
-            rep_values = [point_list[0][varname] for varname in varnames]
-            # translate the point list.
-            self._len = num_points = len(point_list)
-
-            def arr_for(val):
-                if np.isscalar(val):
-                    return np.ndarray(shape=(num_points,))
-                elif isinstance(val, np.ndarray):
-                    shp = (num_points,) + val.shape
-                    return np.ndarray(shape=shp)
-                else:
-                    raise TypeError(
-                        "Illegal object %s of type %s as value of variable in point list."
-                        % (val, type(val))
-                    )
-
-            self.data = {name: arr_for(val) for name, val in zip(varnames, rep_values)}
-            for i, point in enumerate(point_list):
-                for var, value in point.items():
-                    self.data[var][i] = value
-        if dict_ is not None:
-            assert point_list is None and multi_trace is None
-            self.data = dict_
-            self.varnames = list(dict_.keys())
-            self._len = dict_[self.varnames[0]].shape[0]
-        assert self.varnames is not None and self._len is not None and self.data is not None
-
-    def __len__(self) -> int:
-        return self._len
-
-    def _extract_slice(self, slc: slice) -> _TraceDict:
-        sliced_dict: Point = {}
-
-        def apply_slice(arr: np.ndarray) -> np.ndarray:
-            if len(arr.shape) == 1:
-                return arr[slc]
-            else:
-                return arr[slc, :]
-
-        for vn, arr in self.data.items():
-            sliced_dict[vn] = apply_slice(arr)
-        return _TraceDict(dict_=sliced_dict)
-
-    @overload
-    def __getitem__(self, item: str | HasName) -> np.ndarray:
-        ...
-
-    @overload
-    def __getitem__(self, item: slice | int) -> _TraceDict:
-        ...
-
-    def __getitem__(self, item):
-        if isinstance(item, str):
-            return super().__getitem__(item)
-        elif isinstance(item, slice):
-            return self._extract_slice(item)
-        elif isinstance(item, int):
-            return _TraceDict(dict_={k: np.atleast_1d(v[item]) for k, v in self.data.items()})
-        elif hasattr(item, "name"):
-            return super().__getitem__(item.name)
-        else:
-            raise IndexError("Illegal index %s for _TraceDict" % str(item))
-
-
-def fast_sample_posterior_predictive(
-    trace: MultiTrace | Dataset | InferenceData | list[dict[str, np.ndarray]],
-    samples: int | None = None,
-    model: Model | None = None,
-    var_names: list[str] | None = None,
-    keep_size: bool = False,
-    random_seed=None,
-) -> dict[str, np.ndarray]:
-    """Generate posterior predictive samples from a model given a trace.
-
-    This is a vectorized alternative to the standard ``sample_posterior_predictive`` function.
-    It aims to be as compatible as possible with the original API, and is significantly
-    faster.  Both posterior predictive sampling functions have some remaining issues, and
-    we encourage users to verify agreement across the results of both functions for the time
-    being.
-
-    Parameters
-    ----------
-    trace: MultiTrace, xarray.Dataset, InferenceData, or List of points (dictionary)
-        Trace generated from MCMC sampling.
-    samples: int, optional
-        Number of posterior predictive samples to generate. Defaults to one posterior predictive
-        sample per posterior sample, that is, the number of draws times the number of chains. It
-        is not recommended to modify this value; when modified, some chains may not be represented
-        in the posterior predictive sample.
-    model: Model (optional if in `with` context)
-        Model used to generate `trace`
-    var_names: Iterable[str]
-        List of vars to sample.
-    keep_size: bool, optional
-        Force posterior predictive sample to have the same shape as posterior and sample stats
-        data: ``(nchains, ndraws, ...)``.
-    random_seed: int
-        Seed for the random number generator.
-
-    Returns
-    -------
-    samples: dict
-        Dictionary with the variable names as keys, and values numpy arrays containing
-        posterior predictive samples.
-    """
-
-    ### Implementation note: primarily this function canonicalizes the arguments:
-    ### Establishing the model context, wrangling the number of samples,
-    ### Canonicalizing the trace argument into a _TraceDict object and fitting it
-    ### to the requested number of samples.  Then it invokes posterior_predictive_draw_values
-    ### *repeatedly*.  It does this repeatedly, because the trace argument is set up to be
-    ### the same as the number of samples. So if the number of samples requested is
-    ### greater than the number of samples in the trace parameter, we sample repeatedly.  This
-    ### makes the shape issues just a little easier to deal with.
-
-    if isinstance(trace, InferenceData):
-        nchains, ndraws = chains_and_samples(trace)
-        trace = dataset_to_point_list(trace.posterior)
-    elif isinstance(trace, Dataset):
-        nchains, ndraws = chains_and_samples(trace)
-        trace = dataset_to_point_list(trace)
-    elif isinstance(trace, MultiTrace):
-        nchains = trace.nchains
-        ndraws = len(trace)
-    else:
-        if keep_size:
-            # arguably this should be just a warning.
-            raise IncorrectArgumentsError(
-                "For keep_size, cannot identify chains and length from %s.", trace
-            )
-
-    model = modelcontext(model)
-    assert model is not None
-
-    if model.potentials:
-        warnings.warn(
-            "The effect of Potentials on other parameters is ignored during posterior predictive sampling. "
-            "This is likely to lead to invalid or biased predictive samples.",
-            UserWarning,
-        )
-
-    with model:
-
-        if keep_size and samples is not None:
-            raise IncorrectArgumentsError("Should not specify both keep_size and samples arguments")
-
-        if isinstance(trace, list) and all(isinstance(x, dict) for x in trace):
-            _trace = _TraceDict(point_list=trace)
-        elif isinstance(trace, MultiTrace):
-            _trace = _TraceDict(multi_trace=trace)
-        else:
-            raise TypeError(
-                "Unable to generate posterior predictive samples from argument of type %s"
-                % type(trace)
-            )
-
-        len_trace = len(_trace)
-
-        assert isinstance(_trace, _TraceDict)
-
-        _samples: list[int] = []
-        # temporary replacement for more complicated logic.
-        max_samples: int = len_trace
-        if samples is None or samples == max_samples:
-            _samples = [max_samples]
-        elif samples < max_samples:
-            warnings.warn(
-                "samples parameter is smaller than nchains times ndraws, some draws "
-                "and/or chains may not be represented in the returned posterior "
-                "predictive sample"
-            )
-            # if this is less than the number of samples in the trace, take a slice and
-            # work with that.
-            _trace = _trace[slice(samples)]
-            _samples = [samples]
-        elif samples > max_samples:
-            full, rem = divmod(samples, max_samples)
-            _samples = (full * [max_samples]) + ([rem] if rem != 0 else [])
-        else:
-            raise IncorrectArgumentsError(
-                "Unexpected combination of samples (%s) and max_samples (%d)"
-                % (samples, max_samples)
-            )
-
-        if var_names is None:
-            vars = model.observed_RVs
-        else:
-            vars = [model[x] for x in var_names]
-
-        if random_seed is not None:
-            np.random.seed(random_seed)
-
-        if TYPE_CHECKING:
-            _ETPParent = UserDict[str, np.ndarray]  # this is only processed by mypy
-        else:
-            # this is not seen by mypy but will be executed at runtime.
-            _ETPParent = UserDict
-
-        class _ExtendableTrace(_ETPParent):
-            def extend_trace(self, trace: dict[str, np.ndarray]) -> None:
-                for k, v in trace.items():
-                    if k in self.data:
-                        self.data[k] = np.concatenate((self.data[k], v))
-                    else:
-                        self.data[k] = v
-
-        ppc_trace = _ExtendableTrace()
-        for s in _samples:
-            strace = _trace if s == len_trace else _trace[slice(0, s)]
-            try:
-                values = posterior_predictive_draw_values(cast(List[Any], vars), strace, s)
-                new_trace: dict[str, np.ndarray] = {k.name: v for (k, v) in zip(vars, values)}
-                ppc_trace.extend_trace(new_trace)
-            except KeyboardInterrupt:
-                pass
-
-    if keep_size:
-        return {k: ary.reshape((nchains, ndraws, *ary.shape[1:])) for k, ary in ppc_trace.items()}
-    # this gets us a Dict[str, np.ndarray] instead of my wrapped equiv.
-    return ppc_trace.data
-
-
-def posterior_predictive_draw_values(
-    vars: list[Any], trace: _TraceDict, samples: int
-) -> list[np.ndarray]:
-    with _PosteriorPredictiveSampler(vars, trace, samples, None) as sampler:
-        return sampler.draw_values()
-
-
-class _PosteriorPredictiveSampler(AbstractContextManager):
-    """The process of posterior predictive sampling is quite complicated so this provides a central data store."""
-
-    # inputs
-    vars: list[Any]
-    trace: _TraceDict
-    samples: int
-    size: int | None  # not supported!
-
-    # other slots
-    logger: logging.Logger
-
-    # for the search
-    evaluated: dict[int, np.ndarray]
-    symbolic_params: list[tuple[int, Any]]
-
-    # set by make_graph...
-    leaf_nodes: dict[str, Any]
-    named_nodes_parents: dict[str, Any]
-    named_nodes_children: dict[str, Any]
-    _tok: contextvars.Token
-
-    def __init__(self, vars, trace: _TraceDict, samples, model: Model | None, size=None):
-        if size is not None:
-            raise NotImplementedError(
-                "sample_posterior_predictive does not support the size argument at this time."
-            )
-        assert vars is not None
-        self.vars = vars
-        self.trace = trace
-        self.samples = samples
-        self.size = size
-        self.logger = logging.getLogger("posterior_predictive")
-
-    def __enter__(self) -> "_PosteriorPredictiveSampler":
-        self._tok = vectorized_ppc.set(posterior_predictive_draw_values)
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb) -> Literal[False]:
-        vectorized_ppc.reset(self._tok)
-        return False
-
-    def draw_values(self) -> list[np.ndarray]:
-        vars = self.vars
-        trace = self.trace
-        samples = self.samples
-        # size = self.size
-        params = dict(enumerate(vars))
-
-        with _DrawValuesContext() as context:
-            self.init()
-            self.make_graph()
-
-            drawn = context.drawn_vars
-
-            # Init givens and the stack of nodes to try to `_draw_value` from
-            givens = {
-                p.name: (p, v)
-                for (p, samples), v in drawn.items()
-                if getattr(p, "name", None) is not None
-            }
-            stack = list(self.leaf_nodes.values())  # A queue would be more appropriate
-
-            while stack:
-                next_ = stack.pop(0)
-                if (next_, samples) in drawn:
-                    # If the node already has a givens value, skip it
-                    continue
-                elif isinstance(next_, (theano_constant, tt.sharedvar.SharedVariable)):
-                    # If the node is a theano.tensor.TensorConstant or a
-                    # theano.tensor.sharedvar.SharedVariable, its value will be
-                    # available automatically in _compile_theano_function so
-                    # we can skip it. Furthermore, if this node was treated as a
-                    # TensorVariable that should be compiled by theano in
-                    # _compile_theano_function, it would raise a `TypeError:
-                    # ('Constants not allowed in param list', ...)` for
-                    # TensorConstant, and a `TypeError: Cannot use a shared
-                    # variable (...) as explicit input` for SharedVariable.
-                    # ObservedRV and MultiObservedRV instances are ViewOPs
-                    # of TensorConstants or SharedVariables, we must add them
-                    # to the stack or risk evaluating deterministics with the
-                    # wrong values (issue #3354)
-                    stack.extend(
-                        [
-                            node
-                            for node in self.named_nodes_parents[next_]
-                            if isinstance(node, (ObservedRV, MultiObservedRV))
-                            and (node, samples) not in drawn
-                        ]
-                    )
-                    continue
-                else:
-                    # If the node does not have a givens value, try to draw it.
-                    # The named node's children givens values must also be taken
-                    # into account.
-                    children = self.named_nodes_children[next_]
-                    temp_givens = [givens[k] for k in givens if k in children]
-                    try:
-                        # This may fail for autotransformed RVs, which don't
-                        # have the random method
-                        value = self.draw_value(next_, trace=trace, givens=temp_givens)
-                        assert isinstance(value, np.ndarray)
-                        givens[next_.name] = (next_, value)
-                        drawn[(next_, samples)] = value
-                    except theano.graph.fg.MissingInputError:
-                        # The node failed, so we must add the node's parents to
-                        # the stack of nodes to try to draw from. We exclude the
-                        # nodes in the `params` list.
-                        stack.extend(
-                            [
-                                node
-                                for node in self.named_nodes_parents[next_]
-                                if node is not None and (node, samples) not in drawn
-                            ]
-                        )
-
-            # the below makes sure the graph is evaluated in order
-            # test_distributions_random::TestDrawValues::test_draw_order fails without it
-            # The remaining params that must be drawn are all hashable
-            to_eval: set[int] = set()
-            missing_inputs: set[int] = {j for j, p in self.symbolic_params}
-
-            while to_eval or missing_inputs:
-                if to_eval == missing_inputs:
-                    raise ValueError(
-                        "Cannot resolve inputs for {}".format(
-                            [get_var_name(trace.varnames[j]) for j in to_eval]
-                        )
-                    )
-                to_eval = set(missing_inputs)
-                missing_inputs = set()
-                for param_idx in to_eval:
-                    param = vars[param_idx]
-                    drawn = context.drawn_vars
-                    if (param, samples) in drawn:
-                        self.evaluated[param_idx] = drawn[(param, samples)]
-                    else:
-                        try:
-                            if param in self.named_nodes_children:
-                                for node in self.named_nodes_children[param]:
-                                    if node.name not in givens and (node, samples) in drawn:
-                                        givens[node.name] = (
-                                            node,
-                                            drawn[(node, samples)],
-                                        )
-                            value = self.draw_value(param, trace=self.trace, givens=givens.values())
-                            assert isinstance(value, np.ndarray)
-                            self.evaluated[param_idx] = drawn[(param, samples)] = value
-                            givens[param.name] = (param, value)
-                        except theano.graph.fg.MissingInputError:
-                            missing_inputs.add(param_idx)
-        return [self.evaluated[j] for j in params]
-
-    def init(self) -> None:
-        """This method carries out the initialization phase of sampling
-        from the posterior predictive distribution.  Notably it initializes the
-        ``_DrawValuesContext`` bookkeeping object and evaluates the "fast drawable"
-        parts of the model."""
-        vars: list[Any] = self.vars
-        trace: _TraceDict = self.trace
-        samples: int = self.samples
-        leaf_nodes: dict[str, Any]
-        named_nodes_parents: dict[str, Any]
-        named_nodes_children: dict[str, Any]
-
-        # initialization phase
-        context = _DrawValuesContext.get_context()
-        assert isinstance(context, _DrawValuesContext)
-        with context:
-            drawn = context.drawn_vars
-            evaluated: dict[int, Any] = {}
-            symbolic_params = []
-            for i, var in enumerate(vars):
-                if is_fast_drawable(var):
-                    evaluated[i] = self.draw_value(var)
-                    continue
-                name = getattr(var, "name", None)
-                if (var, samples) in drawn:
-                    evaluated[i] = drawn[(var, samples)]
-                    # We filter out Deterministics by checking for `model` attribute
-                elif name is not None and hasattr(var, "model") and name in trace.varnames:
-                    # param.name is in the trace.  Record it as drawn and evaluated
-                    drawn[(var, samples)] = evaluated[i] = trace[cast(str, name)]
-                else:
-                    # param still needs to be drawn
-                    symbolic_params.append((i, var))
-        self.evaluated = evaluated
-        self.symbolic_params = symbolic_params
-
-    def make_graph(self) -> None:
-        # Distribution parameters may be nodes which have named node-inputs
-        # specified in the point. Need to find the node-inputs, their
-        # parents and children to replace them.
-        symbolic_params = self.symbolic_params
-        self.leaf_nodes = {}
-        self.named_nodes_parents = {}
-        self.named_nodes_children = {}
-        for _, param in symbolic_params:
-            if hasattr(param, "name"):
-                # Get the named nodes under the `param` node
-                nn, nnp, nnc = get_named_nodes_and_relations(param)
-                self.leaf_nodes.update(nn)
-                # Update the discovered parental relationships
-                for k in nnp.keys():
-                    if k not in self.named_nodes_parents.keys():
-                        self.named_nodes_parents[k] = nnp[k]
-                    else:
-                        self.named_nodes_parents[k].update(nnp[k])
-                # Update the discovered child relationships
-                for k in nnc.keys():
-                    if k not in self.named_nodes_children.keys():
-                        self.named_nodes_children[k] = nnc[k]
-                    else:
-                        self.named_nodes_children[k].update(nnc[k])
-
-    def draw_value(self, param, trace: _TraceDict | None = None, givens=None):
-        """Draw a set of random values from a distribution or return a constant.
-
-        Parameters
-        ----------
-        param: number, array like, theano variable or pymc3 random variable
-            The value or distribution. Constants or shared variables
-            will be converted to an array and returned. Theano variables
-            are evaluated. If `param` is a pymc3 random variable, draw
-            values from it and return that (as ``np.ndarray``), unless a
-            value is specified in the ``trace``.
-        trace: pm.MultiTrace, optional
-            A dictionary from pymc3 variable names to samples of their values
-            used to provide context for evaluating ``param``.
-        givens: dict, optional
-            A dictionary from theano variables to their values. These values
-            are used to evaluate ``param`` if it is a theano variable.
-        """
-        samples = self.samples
-
-        def random_sample(
-            meth: Callable[..., np.ndarray],
-            param,
-            point: _TraceDict,
-            size: int,
-            shape: tuple[int, ...],
-        ) -> np.ndarray:
-            val = meth(point=point, size=size)
-            try:
-                assert val.shape == (size,) + shape, (
-                    "Sampling from random of %s yields wrong shape" % param
-                )
-            # error-quashing here is *extremely* ugly, but it seems to be what the logic in DensityDist wants.
-            except AssertionError as e:
-                if (
-                    hasattr(param, "distribution")
-                    and hasattr(param.distribution, "wrap_random_with_dist_shape")
-                    and not param.distribution.wrap_random_with_dist_shape
-                ):
-                    pass
-                else:
-                    raise e
-
-            return val
-
-        if isinstance(param, (numbers.Number, np.ndarray)):
-            return param
-        elif isinstance(param, theano_constant):
-            return param.value
-        elif isinstance(param, tt.sharedvar.SharedVariable):
-            return param.get_value()
-        elif isinstance(param, (tt.TensorVariable, MultiObservedRV)):
-            if hasattr(param, "model") and trace and param.name in trace.varnames:
-                return trace[param.name]
-            elif hasattr(param, "random") and param.random is not None:
-                model = modelcontext(None)
-                assert isinstance(model, Model)
-                shape: tuple[int, ...] = tuple(_param_shape(param, model))
-                return random_sample(param.random, param, point=trace, size=samples, shape=shape)
-            elif (
-                hasattr(param, "distribution")
-                and hasattr(param.distribution, "random")
-                and param.distribution.random is not None
-            ):
-                if hasattr(param, "observations"):
-                    # shape inspection for ObservedRV
-                    dist_tmp = param.distribution
-                    try:
-                        distshape: tuple[int, ...] = tuple(param.observations.shape.eval())
-                    except AttributeError:
-                        distshape = tuple(param.observations.shape)
-
-                    dist_tmp.shape = distshape
-                    try:
-                        return random_sample(
-                            dist_tmp.random,
-                            param,
-                            point=trace,
-                            size=samples,
-                            shape=distshape,
-                        )
-                    except (ValueError, TypeError):
-                        # reset shape to account for shape changes
-                        # with theano.shared inputs
-                        dist_tmp.shape = ()
-                        # We want to draw values to infer the dist_shape,
-                        # we don't want to store these drawn values to the context
-                        with _DrawValuesContextBlocker():
-                            point = trace[0] if trace else None
-                            temp_val = np.atleast_1d(dist_tmp.random(point=point, size=None))
-                        # if hasattr(param, 'name') and param.name == 'obs':
-                        #     import pdb; pdb.set_trace()
-                        # Sometimes point may change the size of val but not the
-                        # distribution's shape
-                        if point and samples is not None:
-                            temp_size = np.atleast_1d(samples)
-                            if all(temp_val.shape[: len(temp_size)] == temp_size):
-                                dist_tmp.shape = tuple(temp_val.shape[len(temp_size) :])
-                            else:
-                                dist_tmp.shape = tuple(temp_val.shape)
-                        # I am not sure why I need to do this, but I do in order to trim off a
-                        # degenerate dimension [2019/09/05:rpg]
-                        if dist_tmp.shape[0] == 1 and len(dist_tmp.shape) > 1:
-                            dist_tmp.shape = dist_tmp.shape[1:]
-                        return random_sample(
-                            dist_tmp.random,
-                            point=trace,
-                            size=samples,
-                            param=param,
-                            shape=tuple(dist_tmp.shape),
-                        )
-                else:  # has a distribution, but no observations
-                    distshape = tuple(param.distribution.shape)
-                    return random_sample(
-                        meth=param.distribution.random,
-                        param=param,
-                        point=trace,
-                        size=samples,
-                        shape=distshape,
-                    )
-            # NOTE: I think the following is already vectorized.
-            else:
-                if givens:
-                    variables, values = list(zip(*givens))
-                else:
-                    variables = values = []
-                # We only truly care if the ancestors of param that were given
-                # value have the matching dshape and val.shape
-                param_ancestors = set(
-                    theano.graph.basic.ancestors([param], blockers=list(variables))
-                )
-                inputs = [
-                    (var, val) for var, val in zip(variables, values) if var in param_ancestors
-                ]
-                if inputs:
-                    input_vars, input_vals = list(zip(*inputs))
-                else:
-                    input_vars = []
-                    input_vals = []
-                func = _compile_theano_function(param, input_vars)
-                if not input_vars:
-                    assert input_vals == []  # AFAICT if there are now vars, there can't be vals
-                    output = func(*input_vals)
-                    if hasattr(output, "shape"):
-                        val = np.repeat(np.expand_dims(output, 0), samples, axis=0)
-                    else:
-                        val = np.full(samples, output)
-
-                else:
-                    val = func(*input_vals)
-                    # np.ndarray([func(*input_vals) for inp in zip(*input_vals)])
-                return val
-        raise ValueError("Unexpected type in draw_value: %s" % type(param))
-
-
-def _param_shape(var_desig, model: Model) -> tuple[int, ...]:
-    if isinstance(var_desig, str):
-        v = model[var_desig]
-    else:
-        v = var_desig
-    if hasattr(v, "observations"):
-        try:
-            # To get shape of _observed_ data container `pm.Data`
-            # (wrapper for theano.SharedVariable) we evaluate it.
-            shape = tuple(v.observations.shape.eval())
-        except AttributeError:
-            shape = v.observations.shape
-    elif hasattr(v, "dshape"):
-        shape = v.dshape
-    else:
-        shape = v.tag.test_value.shape
-    if shape == (1,):
-        shape = tuple()
-    return shape
diff --git a/pymc3/distributions/simulator.py b/pymc3/distributions/simulator.py
index 1277ec4c82c..8b5951b1ad6 100644
--- a/pymc3/distributions/simulator.py
+++ b/pymc3/distributions/simulator.py
@@ -18,7 +18,7 @@
 
 from scipy.spatial import cKDTree
 
-from pymc3.distributions.distribution import NoDistribution, draw_values, to_tuple
+from pymc3.distributions.distribution import NoDistribution
 
 __all__ = ["Simulator"]
 
@@ -114,12 +114,12 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        size = to_tuple(size)
-        params = draw_values([*self.params], point=point, size=size)
-        if len(size) == 0:
-            return self.function(*params)
-        else:
-            return np.array([self.function(*params) for _ in range(size[0])])
+        # size = to_tuple(size)
+        # params = draw_values([*self.params], point=point, size=size)
+        # if len(size) == 0:
+        #     return self.function(*params)
+        # else:
+        #     return np.array([self.function(*params) for _ in range(size[0])])
 
     def _str_repr(self, name=None, dist=None, formatting="plain"):
         if dist is None:
diff --git a/pymc3/distributions/timeseries.py b/pymc3/distributions/timeseries.py
index e3e1aa15bc4..7357d240b0e 100644
--- a/pymc3/distributions/timeseries.py
+++ b/pymc3/distributions/timeseries.py
@@ -109,7 +109,7 @@ class AR(distribution.Continuous):
     """
 
     def __init__(
-        self, rho, sigma=None, tau=None, constant=False, init=Flat.dist(), sd=None, *args, **kwargs
+        self, rho, sigma=None, tau=None, constant=False, init=None, sd=None, *args, **kwargs
     ):
         super().__init__(*args, **kwargs)
         if sd is not None:
@@ -141,7 +141,7 @@ def __init__(
 
         self.constant = constant
         self.rho = rho = tt.as_tensor_variable(rho)
-        self.init = init
+        self.init = init or Flat.dist()
 
     def logp(self, value):
         """
@@ -201,7 +201,7 @@ class GaussianRandomWalk(distribution.Continuous):
         distribution for initial value (Defaults to Flat())
     """
 
-    def __init__(self, tau=None, init=Flat.dist(), sigma=None, mu=0.0, sd=None, *args, **kwargs):
+    def __init__(self, tau=None, init=None, sigma=None, mu=0.0, sd=None, *args, **kwargs):
         kwargs.setdefault("shape", 1)
         super().__init__(*args, **kwargs)
         if sum(self.shape) == 0:
@@ -213,7 +213,7 @@ def __init__(self, tau=None, init=Flat.dist(), sigma=None, mu=0.0, sd=None, *arg
         sigma = tt.as_tensor_variable(sigma)
         self.sigma = self.sd = sigma
         self.mu = tt.as_tensor_variable(mu)
-        self.init = init
+        self.init = init or Flat.dist()
         self.mean = tt.as_tensor_variable(0.0)
 
     def _mu_and_sigma(self, mu, sigma):
@@ -261,15 +261,16 @@ def random(self, point=None, size=None):
         -------
         array
         """
-        sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size)
-        return distribution.generate_samples(
-            self._random,
-            sigma=sigma,
-            mu=mu,
-            size=size,
-            dist_shape=self.shape,
-            not_broadcast_kwargs={"sample_shape": to_tuple(size)},
-        )
+        # sigma, mu = distribution.draw_values([self.sigma, self.mu], point=point, size=size)
+        # return distribution.generate_samples(
+        #     self._random,
+        #     sigma=sigma,
+        #     mu=mu,
+        #     size=size,
+        #     dist_shape=self.shape,
+        #     not_broadcast_kwargs={"sample_shape": to_tuple(size)},
+        # )
+        pass
 
     def _random(self, sigma, mu, size, sample_shape):
         """Implement a Gaussian random walk as a cumulative sum of normals.
@@ -430,11 +431,11 @@ class MvGaussianRandomWalk(distribution.Continuous):
     """
 
     def __init__(
-        self, mu=0.0, cov=None, tau=None, chol=None, lower=True, init=Flat.dist(), *args, **kwargs
+        self, mu=0.0, cov=None, tau=None, chol=None, lower=True, init=None, *args, **kwargs
     ):
         super().__init__(*args, **kwargs)
 
-        self.init = init
+        self.init = init or Flat.dist()
         self.innovArgs = (mu, cov, tau, chol, lower)
         self.innov = multivariate.MvNormal.dist(*self.innovArgs, shape=self.shape)
         self.mean = tt.as_tensor_variable(0.0)
diff --git a/pymc3/distributions/transforms.py b/pymc3/distributions/transforms.py
index 146bdbcc673..ed4a669d170 100644
--- a/pymc3/distributions/transforms.py
+++ b/pymc3/distributions/transforms.py
@@ -19,7 +19,6 @@
 
 from pymc3.distributions import distribution
 from pymc3.math import invlogit, logit, logsumexp
-from pymc3.model import FreeRV
 from pymc3.theanof import floatX, gradient
 
 __all__ = [
@@ -126,7 +125,8 @@ def __init__(self, dist, transform, *args, **kwargs):
 
         self.dist = dist
         self.transform_used = transform
-        v = forward(FreeRV(name="v", distribution=dist))
+        # XXX: `FreeRV` no longer exists
+        v = None  # forward(FreeRV(name="v", distribution=dist))
         self.type = v.type
 
         super().__init__(v.shape.tag.test_value, v.dtype, testval, dist.defaults, *args, **kwargs)
diff --git a/pymc3/gp/gp.py b/pymc3/gp/gp.py
index 654bf536cfa..335169c359f 100644
--- a/pymc3/gp/gp.py
+++ b/pymc3/gp/gp.py
@@ -22,7 +22,6 @@
 
 import pymc3 as pm
 
-from pymc3.distributions import draw_values
 from pymc3.gp.cov import Constant, Covariance
 from pymc3.gp.mean import Zero
 from pymc3.gp.util import (
@@ -554,7 +553,8 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False, given=None):
             given = {}
 
         mu, cov = self.predictt(Xnew, diag, pred_noise, given)
-        return draw_values([mu, cov], point=point)
+        # XXX: This needs to be refactored
+        # return draw_values([mu, cov], point=point)
 
     def predictt(self, Xnew, diag=False, pred_noise=False, given=None):
         R"""
@@ -1193,7 +1193,8 @@ def predict(self, Xnew, point=None, diag=False, pred_noise=False):
             Default is `False`.
         """
         mu, cov = self._build_conditional(Xnew, pred_noise, diag)
-        return draw_values([mu, cov], point=point)
+        # XXX: This needs to be refactored
+        # return draw_values([mu, cov], point=point)
 
     def predictt(self, Xnew, diag=False, pred_noise=False):
         R"""
diff --git a/pymc3/model.py b/pymc3/model.py
index 3a066e14ead..010e37f36d3 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -29,19 +29,19 @@
 
 from pandas import Series
 from theano.compile import SharedVariable
-from theano.graph.basic import Apply, Variable
+from theano.graph.basic import Variable
 from theano.tensor.random.op import Observed, observed
 from theano.tensor.var import TensorVariable
 
 import pymc3 as pm
 
 from pymc3.blocking import DictToArrayBijection, RaveledVars
-from pymc3.distributions import _get_scaling, change_rv_size, logpt, logpt_sum
+from pymc3.distributions import change_rv_size, logpt, logpt_sum
 from pymc3.exceptions import ImputationWarning
 from pymc3.math import flatten_list
 from pymc3.memoize import WithMemoization
 from pymc3.theanof import generator, gradient, hessian, inputvars
-from pymc3.util import get_transformed_name, get_var_name
+from pymc3.util import get_var_name
 from pymc3.vartypes import continuous_types, discrete_types, isgenerator, typefilter
 
 __all__ = [
@@ -57,41 +57,7 @@
     "set_data",
 ]
 
-FlatView = collections.namedtuple("FlatView", "input, replacements, view")
-
-
-class PyMC3Variable(TensorVariable):
-    """Class to wrap Theano TensorVariable for custom behavior."""
-
-    # Implement matrix multiplication infix operator: X @ w
-    __matmul__ = tt.dot
-
-    def __rmatmul__(self, other):
-        return tt.dot(other, self)
-
-    def _str_repr(self, name=None, dist=None, formatting="plain"):
-        if getattr(self, "distribution", None) is None:
-            if "latex" in formatting:
-                return None
-            else:
-                return super().__str__()
-
-        if name is None and hasattr(self, "name"):
-            name = self.name
-        if dist is None and hasattr(self, "distribution"):
-            dist = self.distribution
-        return self.distribution._str_repr(name=name, dist=dist, formatting=formatting)
-
-    def _repr_latex_(self, *, formatting="latex_with_params", **kwargs):
-        return self._str_repr(formatting=formatting, **kwargs)
-
-    def __str__(self, **kwargs):
-        try:
-            return self._str_repr(formatting="plain", **kwargs)
-        except:
-            return super().__str__()
-
-    __latex__ = _repr_latex_
+FlatView = collections.namedtuple("FlatView", "input, replacements")
 
 
 class InstanceMethod:
@@ -1089,7 +1055,7 @@ def register_rv(self, rv_var, name, data=None, total_size=None, dims=None):
 
         Returns
         -------
-        FreeRV or ObservedRV
+        TensorVariable
         """
         name = self.name_for(name)
         rv_var.name = name
@@ -1292,11 +1258,6 @@ def profile(self, outs, n=1000, point=None, profile=True, *args, **kwargs):
     def flatten(self, vars=None, order=None, inputvar=None):
         """Flattens model's input and returns:
 
-        FlatView with
-            * input vector variable
-            * replacements ``input_var -> vars``
-            * view `{variable: VarMap}`
-
         Parameters
         ----------
         vars: list of variables or None
@@ -1333,8 +1294,7 @@ def flatten(self, vars=None, order=None, inputvar=None):
             )
             last_idx += arr_len
 
-        view = {vm.var: vm for vm in order.vmap}
-        flat_view = FlatView(inputvar, replacements, view)
+        flat_view = FlatView(inputvar, replacements)
 
         return flat_view
 
@@ -1384,7 +1344,7 @@ def _str_repr(self, formatting="plain", **kwargs):
         else:
             rv_reprs = [rv.__str__() for rv in all_rv]
             rv_reprs = [
-                rv_repr for rv_repr in rv_reprs if not "TransformedDistribution()" in rv_repr
+                rv_repr for rv_repr in rv_reprs if "TransformedDistribution()" not in rv_repr
             ]
             # align vars on their ~
             names = [s[: s.index("~") - 1] for s in rv_reprs]
@@ -1543,68 +1503,6 @@ def __call__(self, *args, **kwargs):
 compilef = fastfn
 
 
-class FreeRV(Factor, PyMC3Variable):
-    """Unobserved random variable that a model is specified in terms of."""
-
-    dshape = None  # type: Tuple[int, ...]
-    size = None  # type: int
-    distribution = None  # type: Optional[Distribution]
-    model = None  # type: Optional[Model]
-
-    def __init__(
-        self,
-        type=None,
-        owner=None,
-        index=None,
-        name=None,
-        distribution=None,
-        total_size=None,
-        model=None,
-    ):
-        """
-        Parameters
-        ----------
-        type: theano type (optional)
-        owner: theano owner (optional)
-        name: str
-        distribution: Distribution
-        model: Model
-        total_size: scalar Tensor (optional)
-            needed for upscaling logp
-        """
-        if type is None:
-            type = distribution.type
-        super().__init__(type, owner, index, name)
-
-        if distribution is not None:
-            self.dshape = tuple(distribution.shape)
-            self.dsize = int(np.prod(distribution.shape))
-            self.distribution = distribution
-            self.tag.test_value = (
-                np.ones(distribution.shape, distribution.dtype) * distribution.default()
-            )
-            self.logp_elemwiset = distribution.logp(self)
-            # The logp might need scaling in minibatches.
-            # This is done in `Factor`.
-            self.logp_sum_unscaledt = distribution.logp_sum(self)
-            self.logp_nojac_unscaledt = distribution.logp_nojac(self)
-            self.total_size = total_size
-            self.model = model
-            self.scaling = _get_scaling(total_size, self.shape, self.ndim)
-
-            incorporate_methods(
-                source=distribution,
-                destination=self,
-                methods=["random"],
-                wrapper=InstanceMethod,
-            )
-
-    @property
-    def init_value(self):
-        """Convenience attribute to return tag.test_value"""
-        return self.tag.test_value
-
-
 def pandas_to_array(data):
     """Convert a pandas object to a NumPy array.
 
@@ -1728,120 +1626,6 @@ def make_obs_var(
     return rv_obs
 
 
-class ObservedRV(Factor, PyMC3Variable):
-    """Observed random variable that a model is specified in terms of.
-    Potentially partially observed.
-    """
-
-    def __init__(
-        self,
-        type=None,
-        owner=None,
-        index=None,
-        name=None,
-        data=None,
-        distribution=None,
-        total_size=None,
-        model=None,
-    ):
-        """
-        Parameters
-        ----------
-        type: theano type (optional)
-        owner: theano owner (optional)
-        name: str
-        distribution: Distribution
-        model: Model
-        total_size: scalar Tensor (optional)
-            needed for upscaling logp
-        """
-
-        if hasattr(data, "type") and isinstance(data.type, tt.TensorType):
-            type = data.type
-
-        if type is None:
-            data = pandas_to_array(data)
-            if isinstance(data, theano.graph.basic.Variable):
-                type = data.type
-            else:
-                type = tt.TensorType(distribution.dtype, [s == 1 for s in data.shape])
-
-        self.observations = data
-
-        super().__init__(type, owner, index, name)
-
-        if distribution is not None:
-            data = as_tensor(data, name, model, distribution)
-
-            self.missing_values = data.missing_values
-            self.logp_elemwiset = distribution.logp(data)
-            # The logp might need scaling in minibatches.
-            # This is done in `Factor`.
-            self.logp_sum_unscaledt = distribution.logp_sum(data)
-            self.logp_nojac_unscaledt = distribution.logp_nojac(data)
-            self.total_size = total_size
-            self.model = model
-            self.distribution = distribution
-
-            # make this RV a view on the combined missing/nonmissing array
-            Apply(theano.compile.view_op, inputs=[data], outputs=[self])
-            self.tag.test_value = theano.compile.view_op(data).tag.test_value.astype(self.dtype)
-            self.scaling = _get_scaling(total_size, data.shape, data.ndim)
-
-    @property
-    def init_value(self):
-        """Convenience attribute to return tag.test_value"""
-        return self.tag.test_value
-
-
-class MultiObservedRV(Factor):
-    """Observed random variable that a model is specified in terms of.
-    Potentially partially observed.
-    """
-
-    def __init__(self, name, data, distribution, total_size=None, model=None):
-        """
-        Parameters
-        ----------
-        type: theano type (optional)
-        owner: theano owner (optional)
-        name: str
-        distribution: Distribution
-        model: Model
-        total_size: scalar Tensor (optional)
-            needed for upscaling logp
-        """
-        self.name = name
-        self.data = {
-            name: as_tensor(data, name, model, distribution) for name, data in data.items()
-        }
-
-        self.missing_values = [
-            datum.missing_values for datum in self.data.values() if datum.missing_values is not None
-        ]
-        self.logp_elemwiset = distribution.logp(**self.data)
-        # The logp might need scaling in minibatches.
-        # This is done in `Factor`.
-        self.logp_sum_unscaledt = distribution.logp_sum(**self.data)
-        self.logp_nojac_unscaledt = distribution.logp_nojac(**self.data)
-        self.total_size = total_size
-        self.model = model
-        self.distribution = distribution
-        self.scaling = _get_scaling(total_size, self.logp_elemwiset.shape, self.logp_elemwiset.ndim)
-
-    # Make hashable by id for draw_values
-    def __hash__(self):
-        return id(self)
-
-    def __eq__(self, other):
-        "Use object identity for MultiObservedRV equality."
-        # This is likely a Bad Thing, but changing it would break a lot of code.
-        return self is other
-
-    def __ne__(self, other):
-        return not self == other
-
-
 def _walk_up_rv(rv, formatting="plain"):
     """Walk up theano graph to get inputs for deterministic RV."""
     all_rvs = []
@@ -1921,67 +1705,6 @@ def Potential(name, var, model=None):
     return var
 
 
-class TransformedRV(PyMC3Variable):
-    """
-    Parameters
-    ----------
-
-    type: theano type (optional)
-    owner: theano owner (optional)
-    name: str
-    distribution: Distribution
-    model: Model
-    total_size: scalar Tensor (optional)
-        needed for upscaling logp
-    """
-
-    def __init__(
-        self,
-        type=None,
-        owner=None,
-        index=None,
-        name=None,
-        distribution=None,
-        model=None,
-        transform=None,
-        total_size=None,
-    ):
-        if type is None:
-            type = distribution.type
-        super().__init__(type, owner, index, name)
-
-        self.transformation = transform
-
-        if distribution is not None:
-            self.model = model
-            self.distribution = distribution
-            self.dshape = tuple(distribution.shape)
-            self.dsize = int(np.prod(distribution.shape))
-
-            transformed_name = get_transformed_name(name, transform)
-
-            self.transformed = model.Var(
-                transformed_name, transform.apply(distribution), total_size=total_size
-            )
-
-            normalRV = transform.backward(self.transformed)
-
-            Apply(theano.compile.view_op, inputs=[normalRV], outputs=[self])
-            self.tag.test_value = normalRV.tag.test_value
-            self.scaling = _get_scaling(total_size, self.shape, self.ndim)
-            incorporate_methods(
-                source=distribution,
-                destination=self,
-                methods=["random"],
-                wrapper=InstanceMethod,
-            )
-
-    @property
-    def init_value(self):
-        """Convenience attribute to return tag.test_value"""
-        return self.tag.test_value
-
-
 def as_iterargs(data):
     if isinstance(data, tuple):
         return data
@@ -1990,7 +1713,7 @@ def as_iterargs(data):
 
 
 def all_continuous(vars):
-    """Check that vars not include discrete variables or BART variables, excepting ObservedRVs."""
+    """Check that vars not include discrete variables or BART variables, excepting observed RVs."""
 
     vars_ = [var for var in vars if not (var.owner and isinstance(var.owner.op, Observed))]
     if any(
diff --git a/pymc3/model_graph.py b/pymc3/model_graph.py
index cd3feb30709..3a999f5e372 100644
--- a/pymc3/model_graph.py
+++ b/pymc3/model_graph.py
@@ -15,17 +15,17 @@
 from collections import deque
 from typing import Dict, Iterator, Optional, Set
 
-VarName = str
-
 from theano.compile import SharedVariable
 from theano.graph.basic import walk
 from theano.tensor import Tensor
+from theano.tensor.random.op import Observed
 
 import pymc3 as pm
 
-from pymc3.model import ObservedRV
 from pymc3.util import get_default_varnames, get_var_name
 
+VarName = str
+
 
 class ModelGraph:
     def __init__(self, model):
@@ -112,7 +112,7 @@ def update_input_map(key: str, val: Set[VarName]):
         for var_name in self.var_names:
             var = self.model[var_name]
             update_input_map(var_name, self.get_parents(var))
-            if isinstance(var, ObservedRV):
+            if var.owner and isinstance(var.owner.op, Observed):
                 try:
                     obs_name = var.observations.name
                     if obs_name:
@@ -128,7 +128,7 @@ def _make_node(self, var_name, graph, *, formatting: str = "plain"):
 
         # styling for node
         attrs = {}
-        if isinstance(v, pm.model.ObservedRV):
+        if v.owner and isinstance(v.owner.op, Observed):
             attrs["style"] = "filled"
 
         # make Data be roundtangle, instead of rectangle
@@ -171,8 +171,9 @@ def get_plates(self):
                     shape = tuple(v.observations.shape.eval())
                 except AttributeError:
                     shape = v.observations.shape
-            elif hasattr(v, "dshape"):
-                shape = v.dshape
+            # XXX: This needs to be refactored
+            # elif hasattr(v, "dshape"):
+            #     shape = v.dshape
             else:
                 shape = v.tag.test_value.shape
             if shape == (1,):
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 5ce93d95643..e69b527faa0 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -41,8 +41,6 @@
 from pymc3.backends.ndarray import NDArray
 from pymc3.blocking import DictToArrayBijection
 from pymc3.distributions import change_rv_size, rv_ancestors, strip_observed
-from pymc3.distributions.distribution import draw_values
-from pymc3.distributions.posterior_predictive import fast_sample_posterior_predictive
 from pymc3.exceptions import IncorrectArgumentsError, SamplingError
 from pymc3.model import Model, Point, all_continuous, modelcontext
 from pymc3.parallel_sampling import Draw, _cpu_count
@@ -81,7 +79,6 @@
     "sample_posterior_predictive_w",
     "init_nuts",
     "sample_prior_predictive",
-    "fast_sample_posterior_predictive",
 ]
 
 STEP_METHODS = (
@@ -1916,7 +1913,9 @@ def sample_posterior_predictive_w(
             var = variables[idx]
             # TODO sample_posterior_predictive_w is currently only work for model with
             # one observed.
-            ppc[var.name].append(draw_values([var], point=param, size=size[idx])[0])
+            # XXX: This needs to be refactored
+            # ppc[var.name].append(draw_values([var], point=param, size=size[idx])[0])
+            raise NotImplementedError()
 
     except KeyboardInterrupt:
         pass
diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py
index aeb04c7ee0d..7db687e1448 100644
--- a/pymc3/step_methods/arraystep.py
+++ b/pymc3/step_methods/arraystep.py
@@ -18,9 +18,10 @@
 import numpy as np
 
 from numpy.random import uniform
+from theano.graph.basic import Variable
 
 from pymc3.blocking import DictToArrayBijection, RaveledVars
-from pymc3.model import PyMC3Variable, modelcontext
+from pymc3.model import modelcontext
 from pymc3.step_methods.compound import CompoundStep
 from pymc3.util import get_var_name
 
@@ -47,7 +48,7 @@ class BlockedStep:
 
     generates_stats = False
     stats_dtypes: List[Dict[str, np.dtype]] = []
-    vars: List[PyMC3Variable] = []
+    vars: List[Variable] = []
 
     def __new__(cls, *args, **kwargs):
         blocked = kwargs.get("blocked")
diff --git a/pymc3/step_methods/elliptical_slice.py b/pymc3/step_methods/elliptical_slice.py
index f1c1bb40d33..1e98234725b 100644
--- a/pymc3/step_methods/elliptical_slice.py
+++ b/pymc3/step_methods/elliptical_slice.py
@@ -16,7 +16,6 @@
 import numpy.random as nr
 import theano.tensor as tt
 
-from pymc3.distributions import draw_values
 from pymc3.model import modelcontext
 from pymc3.step_methods.arraystep import ArrayStep, Competence
 from pymc3.theanof import inputvars
@@ -101,7 +100,8 @@ def astep(self, q0, logp):
 
         # Draw from the normal prior by multiplying the Cholesky decomposition
         # of the covariance with draws from a standard normal
-        chol = draw_values([self.prior_chol])[0]
+        # XXX: This needs to be refactored
+        chol = None  # draw_values([self.prior_chol])[0]
         nu = np.dot(chol, nr.randn(chol.shape[0]))
         y = logp(q0) - nr.standard_exponential()
 
diff --git a/pymc3/step_methods/gibbs.py b/pymc3/step_methods/gibbs.py
index 850b9ac5f56..ec5ef8a44bc 100644
--- a/pymc3/step_methods/gibbs.py
+++ b/pymc3/step_methods/gibbs.py
@@ -19,17 +19,7 @@
 """
 from warnings import warn
 
-from numpy import (
-    arange,
-    array,
-    cumsum,
-    empty,
-    exp,
-    max,
-    nested_iters,
-    ones,
-    searchsorted,
-)
+from numpy import arange, array, cumsum, empty, exp, max, nested_iters, searchsorted
 from numpy.random import uniform
 from theano.graph.basic import graph_inputs
 from theano.tensor import add
@@ -61,7 +51,8 @@ def __init__(self, vars, values=None, model=None):
         )
         model = modelcontext(model)
         self.var = vars[0]
-        self.sh = ones(self.var.dshape, self.var.dtype)
+        # XXX: This needs to be refactored
+        self.sh = None  # ones(self.var.dshape, self.var.dtype)
         if values is None:
             self.values = arange(self.var.distribution.k)
         else:
@@ -71,7 +62,9 @@ def __init__(self, vars, values=None, model=None):
 
     def astep(self, q, logp):
         p = array([logp(v * self.sh) for v in self.values])
-        return categorical(p, self.var.dshape)
+        # XXX: This needs to be refactored
+        shape = None  # self.var.dshape
+        return categorical(p, shape)
 
     @staticmethod
     def competence(var, has_grad):
diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py
index d1c61ac4595..2d104df3158 100644
--- a/pymc3/step_methods/hmc/quadpotential.py
+++ b/pymc3/step_methods/hmc/quadpotential.py
@@ -115,13 +115,13 @@ def update(self, sample, grad, tune):
         """
         pass
 
-    def raise_ok(self, vmap=None):
+    def raise_ok(self, map_info=None):
         """Check if the mass matrix is ok, and raise ValueError if not.
 
         Parameters
         ----------
-        vmap: list of blocking.VarMap
-            List of `VarMap`s, which are namedtuples with var, slc, shp, dtyp
+        map_info: List of (name, shape, dtype)
+            List tuples with variable name, shape, and dtype.
 
         Raises
         ------
@@ -245,8 +245,8 @@ def raise_ok(self, map_info):
 
         Parameters
         ----------
-        vmap: List of tuples (var, )
-            List of `VarMap`s, which are namedtuples with var, slc, shp, dtyp
+        map_info: List of (name, shape, dtype)
+            List tuples with variable name, shape, and dtype.
 
         Raises
         ------
diff --git a/pymc3/step_methods/metropolis.py b/pymc3/step_methods/metropolis.py
index 314e2783384..99ea5d4ebe5 100644
--- a/pymc3/step_methods/metropolis.py
+++ b/pymc3/step_methods/metropolis.py
@@ -23,7 +23,6 @@
 import pymc3 as pm
 
 from pymc3.blocking import DictToArrayBijection
-from pymc3.distributions import draw_values
 from pymc3.step_methods.arraystep import (
     ArrayStep,
     ArrayStepShared,
@@ -156,7 +155,8 @@ def __init__(
         vars = pm.inputvars(vars)
 
         if S is None:
-            S = np.ones(sum(v.dsize for v in vars))
+            # XXX: This needs to be refactored
+            S = None  # np.ones(sum(v.dsize for v in vars))
 
         if proposal_dist is not None:
             self.proposal_dist = proposal_dist(S)
@@ -175,7 +175,8 @@ def __init__(
 
         # Determine type of variables
         self.discrete = np.concatenate(
-            [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]
+            # XXX: This needs to be refactored
+            None  # [[v.dtype in pm.discrete_types] * (v.dsize or 1) for v in vars]
         )
         self.any_discrete = self.discrete.any()
         self.all_discrete = self.discrete.all()
@@ -386,7 +387,8 @@ def __init__(self, vars, order="random", transit_p=0.8, model=None):
         # transition probabilities
         self.transit_p = transit_p
 
-        self.dim = sum(v.dsize for v in vars)
+        # XXX: This needs to be refactored
+        self.dim = None  # sum(v.dsize for v in vars)
 
         if order == "random":
             self.shuffle_dims = True
@@ -465,7 +467,8 @@ def __init__(self, vars, proposal="uniform", order="random", model=None):
             distr = getattr(v.owner, "op", None)
 
             if isinstance(distr, CategoricalRV):
-                k = draw_values([distr.k])[0]
+                # XXX: This needs to be refactored
+                k = None  # draw_values([distr.k])[0]
             elif isinstance(distr, pm.Bernoulli) or (v.dtype in pm.bool_types):
                 k = 2
             else:
@@ -473,7 +476,8 @@ def __init__(self, vars, proposal="uniform", order="random", model=None):
                     "All variables must be categorical or binary" + "for CategoricalGibbsMetropolis"
                 )
             start = len(dimcats)
-            dimcats += [(dim, k) for dim in range(start, start + v.dsize)]
+            # XXX: This needs to be refactored
+            dimcats += None  # [(dim, k) for dim in range(start, start + v.dsize)]
 
         if order == "random":
             self.shuffle_dims = True
diff --git a/pymc3/step_methods/sgmcmc.py b/pymc3/step_methods/sgmcmc.py
index 1620f21b0e8..f8cba8e2a62 100644
--- a/pymc3/step_methods/sgmcmc.py
+++ b/pymc3/step_methods/sgmcmc.py
@@ -96,9 +96,9 @@ class BaseStochasticGradient(ArrayStepShared):
     random_seed: int
         The seed to initialize the Random Stream
     minibatches: iterator
-        If the ObservedRV.observed is not a GeneratorOp then this parameter must not be None
+        If the observed RV is not a GeneratorOp then this parameter must not be None
     minibatch_tensor: list of tensors
-        If the ObservedRV.observed is not a GeneratorOp then this parameter must not be None
+        If the observed RV is not a GeneratorOp then this parameter must not be None
         The length of this tensor should be the same as the next(minibatches)
 
     Notes
@@ -154,16 +154,23 @@ def __init__(
         shared = make_shared_replacements(vars, model)
 
         self.updates = OrderedDict()
-        self.q_size = int(sum(v.dsize for v in self.vars))
+        # XXX: This needs to be refactored
+        self.q_size = None  # int(sum(v.dsize for v in self.vars))
+
+        # This seems to be the only place that `Model.flatten` is used.
+        # TODO: Why not _actually_ flatten the variables?
+        # E.g. `flat_vars = tt.concatenate([var.ravel() for var in vars])`
+        # or `set_subtensor` the `vars` into a `tt.vector`?
 
         flat_view = model.flatten(vars)
         self.inarray = [flat_view.input]
 
         self.dlog_prior = prior_dlogp(vars, model, flat_view)
         self.dlogp_elemwise = elemwise_dlogL(vars, model, flat_view)
-        self.q_size = int(sum(v.dsize for v in self.vars))
+        # XXX: This needs to be refactored
+        self.q_size = None  # int(sum(v.dsize for v in self.vars))
 
-        if minibatch_tensors != None:
+        if minibatch_tensors is not None:
             _check_minibatches(minibatch_tensors, minibatches)
             self.minibatches = minibatches
 
diff --git a/pymc3/tests/test_data_container.py b/pymc3/tests/test_data_container.py
index 966ce47cd6a..a79dfb993d3 100644
--- a/pymc3/tests/test_data_container.py
+++ b/pymc3/tests/test_data_container.py
@@ -46,28 +46,22 @@ def test_sample(self):
             prior_trace0 = pm.sample_prior_predictive(1000)
             trace = pm.sample(1000, init=None, tune=1000, chains=1)
             pp_trace0 = pm.sample_posterior_predictive(trace, 1000)
-            pp_trace01 = pm.fast_sample_posterior_predictive(trace, 1000)
 
             x_shared.set_value(x_pred)
             prior_trace1 = pm.sample_prior_predictive(1000)
             pp_trace1 = pm.sample_posterior_predictive(trace, samples=1000)
-            pp_trace11 = pm.fast_sample_posterior_predictive(trace, samples=1000)
 
         assert prior_trace0["b"].shape == (1000,)
         assert prior_trace0["obs"].shape == (1000, 100)
         assert prior_trace1["obs"].shape == (1000, 200)
 
         assert pp_trace0["obs"].shape == (1000, 100)
-        assert pp_trace01["obs"].shape == (1000, 100)
 
         np.testing.assert_allclose(x, pp_trace0["obs"].mean(axis=0), atol=1e-1)
-        np.testing.assert_allclose(x, pp_trace01["obs"].mean(axis=0), atol=1e-1)
 
         assert pp_trace1["obs"].shape == (1000, 200)
-        assert pp_trace11["obs"].shape == (1000, 200)
 
         np.testing.assert_allclose(x_pred, pp_trace1["obs"].mean(axis=0), atol=1e-1)
-        np.testing.assert_allclose(x_pred, pp_trace11["obs"].mean(axis=0), atol=1e-1)
 
     def test_sample_posterior_predictive_after_set_data(self):
         with pm.Model() as model:
@@ -81,12 +75,9 @@ def test_sample_posterior_predictive_after_set_data(self):
             x_test = [5, 6, 9]
             pm.set_data(new_data={"x": x_test})
             y_test = pm.sample_posterior_predictive(trace)
-            y_test1 = pm.fast_sample_posterior_predictive(trace)
 
         assert y_test["obs"].shape == (1000, 3)
-        assert y_test1["obs"].shape == (1000, 3)
         np.testing.assert_allclose(x_test, y_test["obs"].mean(axis=0), atol=1e-1)
-        np.testing.assert_allclose(x_test, y_test1["obs"].mean(axis=0), atol=1e-1)
 
     def test_sample_after_set_data(self):
         with pm.Model() as model:
@@ -102,12 +93,9 @@ def test_sample_after_set_data(self):
             pm.set_data(new_data={"x": new_x, "y": new_y})
             new_trace = pm.sample(1000, init=None, tune=1000, chains=1)
             pp_trace = pm.sample_posterior_predictive(new_trace, 1000)
-            pp_tracef = pm.fast_sample_posterior_predictive(new_trace, 1000)
 
         assert pp_trace["obs"].shape == (1000, 3)
-        assert pp_tracef["obs"].shape == (1000, 3)
         np.testing.assert_allclose(new_y, pp_trace["obs"].mean(axis=0), atol=1e-1)
-        np.testing.assert_allclose(new_y, pp_tracef["obs"].mean(axis=0), atol=1e-1)
 
     def test_shared_data_as_index(self):
         """
@@ -129,14 +117,11 @@ def test_shared_data_as_index(self):
         with model:
             pm.set_data(new_data={"index": new_index, "y": new_y})
             pp_trace = pm.sample_posterior_predictive(trace, 1000, var_names=["alpha", "obs"])
-            pp_tracef = pm.fast_sample_posterior_predictive(trace, 1000, var_names=["alpha", "obs"])
 
         assert prior_trace["alpha"].shape == (1000, 3)
         assert trace["alpha"].shape == (1000, 3)
         assert pp_trace["alpha"].shape == (1000, 3)
         assert pp_trace["obs"].shape == (1000, 3)
-        assert pp_tracef["alpha"].shape == (1000, 3)
-        assert pp_tracef["obs"].shape == (1000, 3)
 
     def test_shared_data_as_rv_input(self):
         """
diff --git a/pymc3/tests/test_distributions_random.py b/pymc3/tests/test_distributions_random.py
index a56f3f3b7b2..ef9946ff841 100644
--- a/pymc3/tests/test_distributions_random.py
+++ b/pymc3/tests/test_distributions_random.py
@@ -19,7 +19,6 @@
 
 import numpy as np
 import numpy.random as nr
-import numpy.testing as npt
 import pytest
 import scipy.stats as st
 import theano
@@ -30,12 +29,7 @@
 import pymc3 as pm
 
 from pymc3.distributions.dist_math import clipped_beta_rvs
-from pymc3.distributions.distribution import (
-    _DrawValuesContext,
-    _DrawValuesContextBlocker,
-    draw_values,
-    to_tuple,
-)
+from pymc3.distributions.distribution import to_tuple
 from pymc3.exceptions import ShapeError
 from pymc3.tests.helpers import SeededTest
 from pymc3.tests.test_distributions import (
@@ -120,90 +114,6 @@ def pymc3_random_discrete(
         assert p > alpha, str(pt)
 
 
-class TestDrawValues(SeededTest):
-    def test_draw_scalar_parameters(self):
-        with pm.Model():
-            y = pm.Normal("y1", mu=0.0, sigma=1.0)
-            mu, tau = draw_values([y.distribution.mu, y.distribution.tau])
-        npt.assert_almost_equal(mu, 0)
-        npt.assert_almost_equal(tau, 1)
-
-    def test_draw_dependencies(self):
-        with pm.Model():
-            x = pm.Normal("x", mu=0.0, sigma=1.0)
-            exp_x = pm.Deterministic("exp_x", pm.math.exp(x))
-
-        x, exp_x = draw_values([x, exp_x])
-        npt.assert_almost_equal(np.exp(x), exp_x)
-
-    def test_draw_order(self):
-        with pm.Model():
-            x = pm.Normal("x", mu=0.0, sigma=1.0)
-            exp_x = pm.Deterministic("exp_x", pm.math.exp(x))
-
-        # Need to draw x before drawing log_x
-        exp_x, x = draw_values([exp_x, x])
-        npt.assert_almost_equal(np.exp(x), exp_x)
-
-    def test_draw_point_replacement(self):
-        with pm.Model():
-            mu = pm.Normal("mu", mu=0.0, tau=1e-3)
-            sigma = pm.Gamma("sigma", alpha=1.0, beta=1.0, transform=None)
-            y = pm.Normal("y", mu=mu, sigma=sigma)
-            mu2, tau2 = draw_values(
-                [y.distribution.mu, y.distribution.tau], point={"mu": 5.0, "sigma": 2.0}
-            )
-        npt.assert_almost_equal(mu2, 5)
-        npt.assert_almost_equal(tau2, 1 / 2.0 ** 2)
-
-    def test_random_sample_returns_nd_array(self):
-        with pm.Model():
-            mu = pm.Normal("mu", mu=0.0, tau=1e-3)
-            sigma = pm.Gamma("sigma", alpha=1.0, beta=1.0, transform=None)
-            y = pm.Normal("y", mu=mu, sigma=sigma)
-            mu, tau = draw_values([y.distribution.mu, y.distribution.tau])
-        assert isinstance(mu, np.ndarray)
-        assert isinstance(tau, np.ndarray)
-
-
-class TestDrawValuesContext:
-    def test_normal_context(self):
-        with _DrawValuesContext() as context0:
-            assert context0.parent is None
-            context0.drawn_vars["root_test"] = 1
-            with _DrawValuesContext() as context1:
-                assert id(context1.drawn_vars) == id(context0.drawn_vars)
-                assert context1.parent == context0
-                with _DrawValuesContext() as context2:
-                    assert id(context2.drawn_vars) == id(context0.drawn_vars)
-                    assert context2.parent == context1
-                    context2.drawn_vars["leaf_test"] = 2
-                assert context1.drawn_vars["leaf_test"] == 2
-                context1.drawn_vars["root_test"] = 3
-            assert context0.drawn_vars["root_test"] == 3
-            assert context0.drawn_vars["leaf_test"] == 2
-
-    def test_blocking_context(self):
-        with _DrawValuesContext() as context0:
-            assert context0.parent is None
-            context0.drawn_vars["root_test"] = 1
-            with _DrawValuesContext() as context1:
-                assert id(context1.drawn_vars) == id(context0.drawn_vars)
-                assert context1.parent == context0
-                with _DrawValuesContextBlocker() as blocker:
-                    assert id(blocker.drawn_vars) != id(context0.drawn_vars)
-                    assert blocker.parent is None
-                    blocker.drawn_vars["root_test"] = 2
-                    with _DrawValuesContext() as context2:
-                        assert id(context2.drawn_vars) == id(blocker.drawn_vars)
-                        assert context2.parent == blocker
-                        context2.drawn_vars["root_test"] = 3
-                        context2.drawn_vars["leaf_test"] = 4
-                    assert blocker.drawn_vars["root_test"] == 3
-                assert "leaf_test" not in context1.drawn_vars
-            assert context0.drawn_vars["root_test"] == 1
-
-
 class BaseTestCases:
     class BaseTestCase(SeededTest):
         shape = 5
@@ -1228,9 +1138,10 @@ def test_mixture_random_shape():
         w3 = pm.Dirichlet("w3", a=np.ones(2), shape=(20, 2))
         like3 = pm.Mixture("like3", w=w3, comp_dists=comp3, observed=y)
 
-    rand0, rand1, rand2, rand3 = draw_values(
-        [like0, like1, like2, like3], point=m.test_point, size=100
-    )
+    # XXX: This needs to be refactored
+    rand0, rand1, rand2, rand3 = [None] * 4  # draw_values(
+    #     [like0, like1, like2, like3], point=m.test_point, size=100
+    # )
     assert rand0.shape == (100, 20)
     assert rand1.shape == (100, 20)
     assert rand2.shape == (100, 20)
@@ -1265,23 +1176,15 @@ def test_mixture_random_shape_fast():
         w3 = pm.Dirichlet("w3", a=np.ones(2), shape=(20, 2))
         like3 = pm.Mixture("like3", w=w3, comp_dists=comp3, observed=y)
 
-    rand0, rand1, rand2, rand3 = draw_values(
-        [like0, like1, like2, like3], point=m.test_point, size=100
-    )
+    # XXX: This needs to be refactored
+    rand0, rand1, rand2, rand3 = [None] * 4  # draw_values(
+    #     [like0, like1, like2, like3], point=m.test_point, size=100
+    # )
     assert rand0.shape == (100, 20)
     assert rand1.shape == (100, 20)
     assert rand2.shape == (100, 20)
     assert rand3.shape == (100, 20)
 
-    # I *think* that the mixture means that this is not going to work,
-    # but I could be wrong. [2019/08/22:rpg]
-    with m:
-        ppc = pm.fast_sample_posterior_predictive([m.test_point], samples=200)
-    assert ppc["like0"].shape == (200, 20)
-    assert ppc["like1"].shape == (200, 20)
-    assert ppc["like2"].shape == (200, 20)
-    assert ppc["like3"].shape == (200, 20)
-
 
 class TestDensityDist:
     @pytest.mark.parametrize("shape", [(), (3,), (3, 2)], ids=str)
@@ -1303,9 +1206,6 @@ def test_density_dist_with_random_sampleable(self, shape):
         ppc = pm.sample_posterior_predictive(trace, samples=samples, model=model, size=size)
         assert ppc["density_dist"].shape == (samples, size) + obs.distribution.shape
 
-        # ppc = pm.fast_sample_posterior_predictive(trace, samples=samples, model=model, size=size)
-        # assert ppc['density_dist'].shape == (samples, size) + obs.distribution.shape
-
     @pytest.mark.parametrize("shape", [(), (3,), (3, 2)], ids=str)
     def test_density_dist_with_random_sampleable_failure(self, shape):
         with pm.Model() as model:
@@ -1325,9 +1225,6 @@ def test_density_dist_with_random_sampleable_failure(self, shape):
         with pytest.raises(RuntimeError):
             pm.sample_posterior_predictive(trace, samples=samples, model=model, size=100)
 
-        with pytest.raises((TypeError, RuntimeError)):
-            pm.fast_sample_posterior_predictive(trace, samples=samples, model=model, size=100)
-
     @pytest.mark.parametrize("shape", [(), (3,), (3, 2)], ids=str)
     def test_density_dist_with_random_sampleable_hidden_error(self, shape):
         with pm.Model() as model:
@@ -1349,10 +1246,6 @@ def test_density_dist_with_random_sampleable_hidden_error(self, shape):
         assert len(ppc["density_dist"]) == samples
         assert ((samples,) + obs.distribution.shape) != ppc["density_dist"].shape
 
-        ppc = pm.fast_sample_posterior_predictive(trace, samples=samples, model=model)
-        assert len(ppc["density_dist"]) == samples
-        assert ((samples,) + obs.distribution.shape) != ppc["density_dist"].shape
-
     def test_density_dist_with_random_sampleable_handcrafted_success(self):
         with pm.Model() as model:
             mu = pm.Normal("mu", 0, 1)
@@ -1390,9 +1283,6 @@ def test_density_dist_with_random_sampleable_handcrafted_success_fast(self):
         samples = 500
         size = 100
 
-        ppc = pm.fast_sample_posterior_predictive(trace, samples=samples, model=model, size=size)
-        assert ppc["density_dist"].shape == (samples, size) + obs.distribution.shape
-
     def test_density_dist_without_random_not_sampleable(self):
         with pm.Model() as model:
             mu = pm.Normal("mu", 0, 1)
@@ -1404,9 +1294,6 @@ def test_density_dist_without_random_not_sampleable(self):
         with pytest.raises(ValueError):
             pm.sample_posterior_predictive(trace, samples=samples, model=model, size=100)
 
-        with pytest.raises((TypeError, ValueError)):
-            pm.fast_sample_posterior_predictive(trace, samples=samples, model=model, size=100)
-
 
 class TestNestedRandom(SeededTest):
     def build_model(self, distribution, shape, nested_rvs_info):
diff --git a/pymc3/tests/test_distributions_timeseries.py b/pymc3/tests/test_distributions_timeseries.py
index b1401bd90e1..63c993c3ace 100644
--- a/pymc3/tests/test_distributions_timeseries.py
+++ b/pymc3/tests/test_distributions_timeseries.py
@@ -18,11 +18,7 @@
 from pymc3.distributions.continuous import Flat, Normal
 from pymc3.distributions.timeseries import AR, AR1, GARCH11, EulerMaruyama
 from pymc3.model import Model
-from pymc3.sampling import (
-    fast_sample_posterior_predictive,
-    sample,
-    sample_posterior_predictive,
-)
+from pymc3.sampling import sample, sample_posterior_predictive
 from pymc3.tests.helpers import select_by_precision
 from pymc3.theanof import floatX
 
@@ -160,12 +156,9 @@ def test_linear():
         trace = sample(init="advi+adapt_diag", chains=1)
 
     ppc = sample_posterior_predictive(trace, model=model)
-    ppcf = fast_sample_posterior_predictive(trace, model=model)
-    # test
+
     p95 = [2.5, 97.5]
     lo, hi = np.percentile(trace[lamh], p95, axis=0)
     assert (lo < lam) and (lam < hi)
     lo, hi = np.percentile(ppc["zh"], p95, axis=0)
     assert ((lo < z) * (z < hi)).mean() > 0.95
-    lo, hi = np.percentile(ppcf["zh"], p95, axis=0)
-    assert ((lo < z) * (z < hi)).mean() > 0.95
diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py
index ccedda74209..ae8a9fabc4b 100644
--- a/pymc3/tests/test_model.py
+++ b/pymc3/tests/test_model.py
@@ -339,7 +339,7 @@ def test_theano_switch_broadcast_edge_cases(self):
 
 @pytest.mark.xfail(reason="DensityDist not supported")
 def test_multiple_observed_rv():
-    "Test previously buggy MultiObservedRV comparison code."
+    "Test previously buggy multi-observed RV comparison code."
     y1_data = np.random.randn(10)
     y2_data = np.random.randn(100)
     with pm.Model() as model:
diff --git a/pymc3/tests/test_ndarray_backend.py b/pymc3/tests/test_ndarray_backend.py
index 1b13aa0b0f2..75e027d2443 100644
--- a/pymc3/tests/test_ndarray_backend.py
+++ b/pymc3/tests/test_ndarray_backend.py
@@ -271,7 +271,6 @@ def test_sample_posterior_predictive(self, tmpdir_factory):
         np.random.seed(seed)
         with TestSaveLoad.model():
             ppc = pm.sample_posterior_predictive(self.trace)
-            ppcf = pm.fast_sample_posterior_predictive(self.trace)
 
         seed = 10
         np.random.seed(seed)
@@ -282,6 +281,3 @@ def test_sample_posterior_predictive(self, tmpdir_factory):
 
         for key, value in ppc.items():
             assert (value == ppc2[key]).all()
-
-        for key, value in ppcf.items():
-            assert (value == ppc2f[key]).all()
diff --git a/pymc3/tests/test_random.py b/pymc3/tests/test_random.py
deleted file mode 100644
index 7a4ae42ce22..00000000000
--- a/pymc3/tests/test_random.py
+++ /dev/null
@@ -1,187 +0,0 @@
-#   Copyright 2020 The PyMC Developers
-#
-#   Licensed under the Apache License, Version 2.0 (the "License");
-#   you may not use this file except in compliance with the License.
-#   You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#   Unless required by applicable law or agreed to in writing, software
-#   distributed under the License is distributed on an "AS IS" BASIS,
-#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#   See the License for the specific language governing permissions and
-#   limitations under the License.
-
-import numpy as np
-import numpy.testing as npt
-import pytest
-import theano
-import theano.tensor as tt
-
-from numpy import random as nr
-
-import pymc3 as pm
-
-from pymc3.distributions.distribution import _draw_value, draw_values
-from pymc3.tests.helpers import SeededTest
-
-
-def test_draw_value():
-    npt.assert_equal(_draw_value(np.array([5, 6])), [5, 6])
-    npt.assert_equal(_draw_value(np.array(5.0)), 5)
-
-    npt.assert_equal(_draw_value(tt.constant([5.0, 6.0])), [5, 6])
-    assert _draw_value(tt.constant(5)) == 5
-    npt.assert_equal(_draw_value(2 * tt.constant([5.0, 6.0])), [10, 12])
-
-    val = theano.shared(np.array([5.0, 6.0]))
-    npt.assert_equal(_draw_value(val), [5, 6])
-    npt.assert_equal(_draw_value(2 * val), [10, 12])
-
-    a = tt.scalar("a")
-    a.tag.test_value = 6
-    npt.assert_equal(_draw_value(2 * a, givens=[(a, 1)]), 2)
-
-    assert _draw_value(5) == 5
-    assert _draw_value(5.0) == 5
-    assert isinstance(_draw_value(5.0), type(5.0))
-    assert isinstance(_draw_value(5), type(5))
-
-    with pm.Model():
-        mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
-        a = pm.Normal("a", mu=mu, sigma=5, shape=2)
-
-    val1 = _draw_value(a)
-    val2 = _draw_value(a)
-    assert np.all(val1 != val2)
-
-    with pytest.raises(ValueError) as err:
-        _draw_value([])
-    err.match("Unexpected type")
-
-
-class TestDrawValues:
-    def test_empty(self):
-        assert draw_values([]) == []
-
-    def test_vals(self):
-        npt.assert_equal(draw_values([np.array([5, 6])])[0], [5, 6])
-        npt.assert_equal(draw_values([np.array(5.0)])[0], 5)
-
-        npt.assert_equal(draw_values([tt.constant([5.0, 6.0])])[0], [5, 6])
-        assert draw_values([tt.constant(5)])[0] == 5
-        npt.assert_equal(draw_values([2 * tt.constant([5.0, 6.0])])[0], [10, 12])
-
-        val = theano.shared(np.array([5.0, 6.0]))
-        npt.assert_equal(draw_values([val])[0], [5, 6])
-        npt.assert_equal(draw_values([2 * val])[0], [10, 12])
-
-    def test_simple_model(self):
-        with pm.Model():
-            mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
-            a = pm.Normal("a", mu=mu, sigma=5, shape=2)
-
-        val1 = draw_values([a])
-        val2 = draw_values([a])
-        assert np.all(val1[0] != val2[0])
-
-        point = {"a": np.array([3.0, 4.0])}
-        npt.assert_equal(draw_values([a], point=point), [point["a"]])
-
-    def test_dep_vars(self):
-        with pm.Model():
-            mu = 2 * tt.constant(np.array([5.0, 6.0])) + theano.shared(np.array(5))
-            sd = pm.HalfNormal("sd", shape=2)
-            tau = 1 / sd ** 2
-            a = pm.Normal("a", mu=mu, tau=tau, shape=2)
-
-        point = {"a": np.array([1.0, 2.0])}
-        npt.assert_equal(draw_values([a], point=point), [point["a"]])
-
-        val1 = draw_values([a])[0]
-        val2 = draw_values([a], point={"sd": np.array([2.0, 3.0])})[0]
-        val3 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0]
-        val4 = draw_values([a], point={"sd_log__": np.array([2.0, 3.0])})[0]
-
-        assert all(
-            [
-                np.all(val1 != val2),
-                np.all(val1 != val3),
-                np.all(val1 != val4),
-                np.all(val2 != val3),
-                np.all(val2 != val4),
-                np.all(val3 != val4),
-            ]
-        )
-
-    def test_graph_constant(self):
-        # Issue 3595 pointed out that slice(None) can introduce
-        # theano.graph.basic.Constant into the compute graph, which wasn't
-        # handled correctly by draw_values
-        n_d = 500
-        n_x = 2
-        n_y = 1
-        n_g = 10
-        g = np.random.randint(0, n_g, (n_d,))  # group
-        x = np.random.randint(0, n_x, (n_d,))  # x factor
-        with pm.Model():
-            multi_dim_rv = pm.Normal("multi_dim_rv", mu=0, sd=1, shape=(n_x, n_g, n_y))
-            indexed_rv = multi_dim_rv[x, g, :]
-            i = draw_values([indexed_rv])
-            assert i is not None
-
-
-class TestJointDistributionDrawValues(SeededTest):
-    def test_joint_distribution(self):
-        with pm.Model() as model:
-            a = pm.Normal("a", mu=0, sigma=100)
-            b = pm.Normal("b", mu=a, sigma=1e-8)
-            c = pm.Normal("c", mu=a, sigma=1e-8)
-            d = pm.Deterministic("d", b + c)
-
-        # Expected RVs
-        N = 1000
-        norm = np.random.randn(3, N)
-        eA = norm[0] * 100
-        eB = eA + norm[1] * 1e-8
-        eC = eA + norm[2] * 1e-8
-        eD = eB + eC
-
-        # Drawn RVs
-        nr.seed(self.random_seed)
-        #        A, B, C, D = list(zip(*[draw_values([a, b, c, d]) for i in range(N)]))
-        A, B, C, D = draw_values([a, b, c, d], size=N)
-        A = np.array(A).flatten()
-        B = np.array(B).flatten()
-        C = np.array(C).flatten()
-        D = np.array(D).flatten()
-
-        # Assert that the drawn samples match the expected values
-        assert np.allclose(eA, A)
-        assert np.allclose(eB, B)
-        assert np.allclose(eC, C)
-        assert np.allclose(eD, D)
-
-        # Assert that A, B and C have the expected difference
-        assert np.all(np.abs(A - B) < 1e-6)
-        assert np.all(np.abs(A - C) < 1e-6)
-        assert np.all(np.abs(B - C) < 1e-6)
-
-        # Marginal draws
-        mA = np.array([draw_values([a]) for i in range(N)]).flatten()
-        mB = np.array([draw_values([b]) for i in range(N)]).flatten()
-        mC = np.array([draw_values([c]) for i in range(N)]).flatten()
-        # Also test the with model context of draw_values
-        with model:
-            mD = np.array([draw_values([d]) for i in range(N)]).flatten()
-
-        # Assert that the marginal distributions have different sample values
-        assert not np.all(np.abs(B - mB) < 1e-2)
-        assert not np.all(np.abs(C - mC) < 1e-2)
-        assert not np.all(np.abs(D - mD) < 1e-2)
-
-        # Assert that the marginal distributions do not have high cross
-        # correlation
-        assert np.abs(np.corrcoef(mA, mB)[0, 1]) < 0.1
-        assert np.abs(np.corrcoef(mA, mC)[0, 1]) < 0.1
-        assert np.abs(np.corrcoef(mB, mC)[0, 1]) < 0.1
diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 3b14f11b04c..db816b2f553 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -404,27 +404,20 @@ def test_normal_scalar(self):
         with model:
             # test list input
             ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
-            ppc0 = pm.fast_sample_posterior_predictive([model.test_point], samples=10)
             # deprecated argument is not introduced to fast version [2019/08/20:rpg]
             ppc = pm.sample_posterior_predictive(trace, var_names=["a"])
             # test empty ppc
             ppc = pm.sample_posterior_predictive(trace, var_names=[])
             assert len(ppc) == 0
-            ppc = pm.fast_sample_posterior_predictive(trace, var_names=[])
-            assert len(ppc) == 0
 
             # test keep_size parameter
             ppc = pm.sample_posterior_predictive(trace, keep_size=True)
             assert ppc["a"].shape == (nchains, ndraws)
-            ppc = pm.fast_sample_posterior_predictive(trace, keep_size=True)
-            assert ppc["a"].shape == (nchains, ndraws)
 
             # test keep_size parameter and idata input
             idata = az.from_pymc3(trace)
             ppc = pm.sample_posterior_predictive(idata, keep_size=True)
             assert ppc["a"].shape == (nchains, ndraws)
-            ppc = pm.fast_sample_posterior_predictive(trace, keep_size=True)
-            assert ppc["a"].shape == (nchains, ndraws)
 
             # test default case
             ppc = pm.sample_posterior_predictive(trace, var_names=["a"])
@@ -434,14 +427,6 @@ def test_normal_scalar(self):
             _, pval = stats.kstest(ppc["a"] - trace["mu"], stats.norm(loc=0, scale=1).cdf)
             assert pval > 0.001
 
-            # test default case
-            ppc = pm.fast_sample_posterior_predictive(trace, var_names=["a"])
-            assert "a" in ppc
-            assert ppc["a"].shape == (nchains * ndraws,)
-            # mu's standard deviation may have changed thanks to a's observed
-            _, pval = stats.kstest(ppc["a"] - trace["mu"], stats.norm(loc=0, scale=1).cdf)
-            assert pval > 0.001
-
         # size argument not introduced to fast version [2019/08/20:rpg]
         with model:
             ppc = pm.sample_posterior_predictive(trace, size=5, var_names=["a"])
@@ -459,11 +444,6 @@ def test_normal_vector(self, caplog):
             ppc = pm.sample_posterior_predictive(trace, samples=12, var_names=[])
             assert len(ppc) == 0
 
-            # test list input
-            ppc0 = pm.fast_sample_posterior_predictive([model.test_point], samples=10)
-            ppc = pm.fast_sample_posterior_predictive(trace, samples=12, var_names=[])
-            assert len(ppc) == 0
-
             # test keep_size parameter
             ppc = pm.sample_posterior_predictive(trace, keep_size=True)
             assert ppc["a"].shape == (trace.nchains, len(trace), 2)
@@ -481,22 +461,6 @@ def test_normal_vector(self, caplog):
             assert "a" in ppc
             assert ppc["a"].shape == (12, 2)
 
-            # test keep_size parameter
-            ppc = pm.fast_sample_posterior_predictive(trace, keep_size=True)
-            assert ppc["a"].shape == (trace.nchains, len(trace), 2)
-            with pytest.warns(UserWarning):
-                ppc = pm.fast_sample_posterior_predictive(trace, samples=12, var_names=["a"])
-            assert "a" in ppc
-            assert ppc["a"].shape == (12, 2)
-
-            # test keep_size parameter with inference data as input
-            ppc = pm.fast_sample_posterior_predictive(idata, keep_size=True)
-            assert ppc["a"].shape == (trace.nchains, len(trace), 2)
-            with pytest.warns(UserWarning):
-                ppc = pm.fast_sample_posterior_predictive(trace, samples=12, var_names=["a"])
-            assert "a" in ppc
-            assert ppc["a"].shape == (12, 2)
-
             # size unsupported by fast_ version  argument. [2019/08/19:rpg]
             ppc = pm.sample_posterior_predictive(trace, samples=10, var_names=["a"], size=4)
             assert "a" in ppc
@@ -511,10 +475,7 @@ def test_exceptions(self, caplog):
         with model:
             with pytest.raises(IncorrectArgumentsError):
                 ppc = pm.sample_posterior_predictive(trace, samples=10, keep_size=True)
-            with pytest.raises(IncorrectArgumentsError):
-                ppc = pm.fast_sample_posterior_predictive(trace, samples=10, keep_size=True)
 
-            # Not for fast_sample_posterior_predictive
             with pytest.raises(IncorrectArgumentsError):
                 ppc = pm.sample_posterior_predictive(trace, size=4, keep_size=True)
 
@@ -522,8 +483,6 @@ def test_exceptions(self, caplog):
             bad_trace = {"mu": stats.norm.rvs(size=1000)}
             with pytest.raises(TypeError):
                 ppc = pm.sample_posterior_predictive(bad_trace)
-            with pytest.raises(TypeError):
-                ppc = pm.fast_sample_posterior_predictive(bad_trace)
 
     def test_vector_observed(self):
         with pm.Model() as model:
@@ -545,15 +504,6 @@ def test_vector_observed(self):
             assert "a" in ppc
             assert ppc["a"].shape == (10, 4, 2)
 
-            # now with fast version
-            # test list input
-            ppc0 = pm.fast_sample_posterior_predictive([model.test_point], samples=10)
-            ppc = pm.fast_sample_posterior_predictive(trace, samples=12, var_names=[])
-            assert len(ppc) == 0
-            ppc = pm.fast_sample_posterior_predictive(trace, samples=12, var_names=["a"])
-            assert "a" in ppc
-            assert ppc["a"].shape == (12, 2)
-
     def test_sum_normal(self):
         with pm.Model() as model:
             a = pm.Normal("a", sigma=0.2)
@@ -571,16 +521,6 @@ def test_sum_normal(self):
             _, pval = stats.kstest(ppc["b"], stats.norm(scale=scale).cdf)
             assert pval > 0.001
 
-            # test list input
-            ppc0 = pm.fast_sample_posterior_predictive([model.test_point], samples=10)
-            assert ppc0 == {}
-            ppc = pm.fast_sample_posterior_predictive(trace, samples=1000, var_names=["b"])
-            assert len(ppc) == 1
-            assert ppc["b"].shape == (1000,)
-            scale = np.sqrt(1 + 0.2 ** 2)
-            _, pval = stats.kstest(ppc["b"], stats.norm(scale=scale).cdf)
-            assert pval > 0.001
-
     def test_model_not_drawable_prior(self):
         data = np.random.poisson(lam=10, size=200)
         model = pm.Model()
@@ -596,9 +536,6 @@ def test_model_not_drawable_prior(self):
             samples = pm.sample_posterior_predictive(trace, 40)
             assert samples["foo"].shape == (40, 200)
 
-            samples = pm.fast_sample_posterior_predictive(trace, 40)
-            assert samples["foo"].shape == (40, 200)
-
     def test_model_shared_variable(self):
         x = np.random.randn(100)
         y = x > 0
@@ -624,17 +561,6 @@ def test_model_shared_variable(self):
         assert post_pred["obs"].shape == (samples, 3)
         npt.assert_allclose(post_pred["p"], expected_p)
 
-        # fast version
-        samples = 100
-        with model:
-            post_pred = pm.fast_sample_posterior_predictive(
-                trace, samples=samples, var_names=["p", "obs"]
-            )
-
-        expected_p = np.array([logistic.eval({coeff: val}) for val in trace["x"][:samples]])
-        assert post_pred["obs"].shape == (samples, 3)
-        npt.assert_allclose(post_pred["p"], expected_p)
-
     def test_deterministic_of_observed(self):
         meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(10))
         meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(10))
@@ -664,16 +590,6 @@ def test_deterministic_of_observed(self):
 
             npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
 
-            np.random.seed(0)
-            ppc = pm.fast_sample_posterior_predictive(
-                model=model,
-                trace=trace,
-                samples=len(trace) * nchains,
-                var_names=[var.name for var in (model.deterministics + model.basic_RVs)],
-            )
-
-            npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
-
     def test_deterministic_of_observed_modified_interface(self):
         meas_in_1 = pm.theanof.floatX(2 + 4 * np.random.randn(100))
         meas_in_2 = pm.theanof.floatX(5 + 4 * np.random.randn(100))
@@ -702,16 +618,6 @@ def test_deterministic_of_observed_modified_interface(self):
             rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
             npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
 
-            ppc = pm.fast_sample_posterior_predictive(
-                model=model,
-                trace=ppc_trace,
-                samples=len(ppc_trace),
-                var_names=[x.name for x in (model.deterministics + model.basic_RVs)],
-            )
-
-            rtol = 1e-5 if theano.config.floatX == "float64" else 1e-3
-            npt.assert_allclose(ppc["in_1"] + ppc["in_2"], ppc["out"], rtol=rtol)
-
     def test_variable_type(self):
         with pm.Model() as model:
             mu = pm.HalfNormal("mu", 1)
@@ -736,9 +642,6 @@ def test_potentials_warning(self):
             with pytest.warns(UserWarning, match=warning_msg):
                 pm.sample_posterior_predictive(trace, samples=5)
 
-            with pytest.warns(UserWarning, match=warning_msg):
-                pm.fast_sample_posterior_predictive(trace, samples=5)
-
 
 class TestSamplePPCW(SeededTest):
     def test_sample_posterior_predictive_w(self):
@@ -947,9 +850,6 @@ def test_multivariate2(self):
         assert sim_priors["obs"].shape == (20,) + obs.distribution.shape
         assert sim_ppc["obs"].shape == (20,) + obs.distribution.shape
 
-        sim_ppc = pm.fast_sample_posterior_predictive(burned_trace, samples=20, model=dm_model)
-        assert sim_ppc["obs"].shape == (20,) + obs.distribution.shape
-
     def test_layers(self):
         with pm.Model() as model:
             a = pm.Uniform("a", lower=0, upper=1, shape=10)
@@ -1052,11 +952,6 @@ def test_potentials_warning(self):
 
 
 class TestSamplePosteriorPredictive:
-    def test_point_list_arg_bug_fspp(self, point_list_arg_bug_fixture):
-        pmodel, trace = point_list_arg_bug_fixture
-        with pmodel:
-            pp = pm.fast_sample_posterior_predictive([trace[15]], var_names=["d"])
-
     def test_point_list_arg_bug_spp(self, point_list_arg_bug_fixture):
         pmodel, trace = point_list_arg_bug_fixture
         with pmodel:
@@ -1076,9 +971,3 @@ def test_sample_from_xarray_posterior(self, point_list_arg_bug_fixture):
         idat = az.from_pymc3(trace)
         with pmodel:
             pp = pm.sample_posterior_predictive(idat.posterior, var_names=["d"])
-
-    def test_sample_from_xarray_posterior_fast(self, point_list_arg_bug_fixture):
-        pmodel, trace = point_list_arg_bug_fixture
-        idat = az.from_pymc3(trace)
-        with pmodel:
-            pp = pm.fast_sample_posterior_predictive(idat.posterior, var_names=["d"])
diff --git a/pymc3/tests/test_shared.py b/pymc3/tests/test_shared.py
index 723216362fb..8b0d6750836 100644
--- a/pymc3/tests/test_shared.py
+++ b/pymc3/tests/test_shared.py
@@ -43,19 +43,15 @@ def test_sample(self):
 
             trace = pm.sample(1000, init=None, tune=1000, chains=1)
             pp_trace0 = pm.sample_posterior_predictive(trace, 1000)
-            pp_trace01 = pm.fast_sample_posterior_predictive(trace, 1000)
 
             x_shared.set_value(x_pred)
             prior_trace1 = pm.sample_prior_predictive(1000)
             pp_trace1 = pm.sample_posterior_predictive(trace, 1000)
-            pp_trace11 = pm.fast_sample_posterior_predictive(trace, 1000)
 
         assert prior_trace0["b"].shape == (1000,)
         assert prior_trace0["obs"].shape == (1000, 100)
         np.testing.assert_allclose(x, pp_trace0["obs"].mean(axis=0), atol=1e-1)
-        np.testing.assert_allclose(x, pp_trace01["obs"].mean(axis=0), atol=1e-1)
 
         assert prior_trace1["b"].shape == (1000,)
         assert prior_trace1["obs"].shape == (1000, 200)
         np.testing.assert_allclose(x_pred, pp_trace1["obs"].mean(axis=0), atol=1e-1)
-        np.testing.assert_allclose(x_pred, pp_trace11["obs"].mean(axis=0), atol=1e-1)
diff --git a/pymc3/tests/test_variational_inference.py b/pymc3/tests/test_variational_inference.py
index 1ef9b616290..2df45cd33d5 100644
--- a/pymc3/tests/test_variational_inference.py
+++ b/pymc3/tests/test_variational_inference.py
@@ -208,7 +208,8 @@ def parametric_grouped_approxes(request):
 
 @pytest.fixture
 def three_var_aevb_groups(parametric_grouped_approxes, three_var_model, aevb_initial):
-    dsize = np.prod(pymc3.util.get_transformed(three_var_model.one).dshape[1:])
+    # XXX: This needs to be refactored
+    dsize = None  # np.prod(pymc3.util.get_transformed(three_var_model.one).dshape[1:])
     cls, kw = parametric_grouped_approxes
     spec = cls.get_param_spec_for(d=dsize, **kw)
     params = dict()
diff --git a/pymc3/util.py b/pymc3/util.py
index f4be2bf391d..c3572d362d2 100644
--- a/pymc3/util.py
+++ b/pymc3/util.py
@@ -170,7 +170,7 @@ def get_repr_for_variable(variable, formatting="plain"):
 def get_var_name(var):
     """Get an appropriate, plain variable name for a variable. Necessary
     because we override theano.tensor.TensorVariable.__str__ to give informative
-    string representations to our pymc3.PyMC3Variables, yet we want to use the
+    string representations to our TensorVariables, yet we want to use the
     plain name as e.g. keys in dicts.
     """
     if isinstance(var, TensorVariable):
diff --git a/pymc3/variational/approximations.py b/pymc3/variational/approximations.py
index 5da02e50f69..cbee4463dd9 100644
--- a/pymc3/variational/approximations.py
+++ b/pymc3/variational/approximations.py
@@ -260,7 +260,7 @@ def create_shared_params(self, trace=None, size=None, jitter=1, start=None):
     def _check_trace(self):
         trace = self._kwargs.get("trace", None)
         if trace is not None and not all([var.name in trace.varnames for var in self.group]):
-            raise ValueError("trace has not all FreeRV in the group")
+            raise ValueError("trace has not all free RVs in the group")
 
     def randidx(self, size=None):
         if size is None:
diff --git a/pymc3/variational/inference.py b/pymc3/variational/inference.py
index 85eb08e65c0..83719e1beae 100644
--- a/pymc3/variational/inference.py
+++ b/pymc3/variational/inference.py
@@ -166,7 +166,8 @@ def _iterate_without_loss(self, s, _, step_func, progress, callbacks):
                 if np.isnan(current_param).any():
                     name_slc = []
                     tmp_hold = list(range(current_param.size))
-                    vmap = self.approx.groups[0].bij.ordering.vmap
+                    # XXX: This needs to be refactored
+                    vmap = None  # self.approx.groups[0].bij.ordering.vmap
                     for vmap_ in vmap:
                         slclen = len(tmp_hold[vmap_.slc])
                         for j in range(slclen):
@@ -215,7 +216,8 @@ def _infmean(input_array):
                     current_param = self.approx.params[0].get_value()
                     name_slc = []
                     tmp_hold = list(range(current_param.size))
-                    vmap = self.approx.groups[0].bij.ordering.vmap
+                    # XXX: This needs to be refactored
+                    vmap = None  # self.approx.groups[0].bij.ordering.vmap
                     for vmap_ in vmap:
                         slclen = len(tmp_hold[vmap_.slc])
                         for j in range(slclen):
diff --git a/pymc3/variational/opvi.py b/pymc3/variational/opvi.py
index 7764d356053..4511f78efba 100644
--- a/pymc3/variational/opvi.py
+++ b/pymc3/variational/opvi.py
@@ -56,7 +56,6 @@
 import pymc3 as pm
 
 from pymc3.backends import NDArray
-from pymc3.blocking import ArrayOrdering, VarMap
 from pymc3.memoize import WithMemoization, memoize
 from pymc3.model import modelcontext
 from pymc3.theanof import identity, tt_rng
@@ -953,7 +952,7 @@ def __init_group__(self, group):
         self.group = [get_transformed(var) for var in self.group]
 
         # XXX: This needs to be refactored
-        self.ordering = ArrayOrdering([])
+        # self.ordering = ArrayOrdering([])
         self.replacements = dict()
         for var in self.group:
             if isinstance(var.distribution, pm.Discrete):
@@ -965,18 +964,24 @@ def __init_group__(self, group):
                         raise LocalGroupError("Local variable should not be scalar")
                     else:
                         raise BatchedGroupError("Batched variable should not be scalar")
-                self.ordering.size += (np.prod(var.dshape[1:])).astype(int)
+                # XXX: This needs to be refactored
+                # self.ordering.size += None  # (np.prod(var.dshape[1:])).astype(int)
                 if self.local:
-                    shape = (-1,) + var.dshape[1:]
+                    # XXX: This needs to be refactored
+                    shape = None  # (-1,) + var.dshape[1:]
                 else:
-                    shape = var.dshape
+                    # XXX: This needs to be refactored
+                    shape = None  # var.dshape
             else:
-                self.ordering.size += var.dsize
-                shape = var.dshape
-            end = self.ordering.size
-            vmap = VarMap(var.name, slice(begin, end), shape, var.dtype)
-            self.ordering.vmap.append(vmap)
-            self.ordering.by_name[vmap.var] = vmap
+                # XXX: This needs to be refactored
+                # self.ordering.size += None  # var.dsize
+                # XXX: This needs to be refactored
+                shape = None  # var.dshape
+            # end = self.ordering.size
+            # XXX: This needs to be refactored
+            vmap = None  # VarMap(var.name, slice(begin, end), shape, var.dtype)
+            # self.ordering.vmap.append(vmap)
+            # self.ordering.by_name[vmap.var] = vmap
             vr = self.input[..., vmap.slc].reshape(shape).astype(vmap.dtyp)
             vr.name = vmap.var + "_vi_replacement"
             self.replacements[var] = vr
@@ -1031,7 +1036,8 @@ def _new_initial_shape(self, size, dim, more_replacements=None):
     def bdim(self):
         if not self.local:
             if self.batched:
-                return self.ordering.vmap[0].shp[0]
+                # XXX: This needs to be refactored
+                return None  # self.ordering.vmap[0].shp[0]
             else:
                 return 1
         else:
@@ -1039,11 +1045,13 @@ def bdim(self):
 
     @node_property
     def ndim(self):
-        return self.ordering.size * self.bdim
+        # XXX: This needs to be refactored
+        return None  # self.ordering.size * self.bdim
 
     @property
     def ddim(self):
-        return self.ordering.size
+        # XXX: This needs to be refactored
+        return None  # self.ordering.size
 
     def _new_initial(self, size, deterministic, more_replacements=None):
         """*Dev* - allocates new initial random generator
@@ -1286,7 +1294,7 @@ def __init__(self, groups, model=None):
         self._scale_cost_to_minibatch = theano.shared(np.int8(1))
         model = modelcontext(model)
         if not model.free_RVs:
-            raise TypeError("Model does not have FreeRVs")
+            raise TypeError("Model does not have an free RVs")
         self.groups = list()
         seen = set()
         rest = None