Skip to content

Commit

Permalink
Implement naive RandomVariable-based posterior predictive sampling
Browse files Browse the repository at this point in the history
The approach currently being used is rather inefficient.  Instead, we should
change the `size` parameters for `RandomVariable` terms in the sample-space
graph(s) so that they match arrays of the inputs in the trace and the desired
number of output samples.  This would allow the compiled graph to vectorize
operations (when it can) and sample variables more efficiently in large batches.
  • Loading branch information
brandonwillard committed Feb 5, 2021
1 parent c377b13 commit dc758cc
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 11 deletions.
9 changes: 4 additions & 5 deletions pymc3/distributions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,10 @@ def strip_observed(x: TensorVariable) -> TensorVariable:
def sample_to_measure_vars(graphs: List[TensorVariable]) -> List[TensorVariable]:
"""Replace `RandomVariable` terms in graphs with their measure-space counterparts."""
replace = {}
for anc in ancestors(graphs):
if anc.owner and isinstance(anc.owner.op, RandomVariable):
measure_var = getattr(anc.tag, "value_var", None)
if measure_var is not None:
replace[anc] = measure_var
for anc in rv_ancestors(graphs):
measure_var = getattr(anc.tag, "value_var", None)
if measure_var is not None:
replace[anc] = measure_var

dist_params = clone_replace(graphs, replace=replace)
return dist_params
Expand Down
29 changes: 28 additions & 1 deletion pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from pymc3.backends.base import BaseTrace, MultiTrace
from pymc3.backends.ndarray import NDArray
from pymc3.blocking import DictToArrayBijection
from pymc3.distributions import change_rv_size, rv_ancestors, strip_observed
from pymc3.distributions.distribution import draw_values
from pymc3.distributions.posterior_predictive import fast_sample_posterior_predictive
from pymc3.exceptions import IncorrectArgumentsError, SamplingError
Expand Down Expand Up @@ -1715,6 +1716,31 @@ def sample_posterior_predictive(
if progressbar:
indices = progress_bar(indices, total=samples, display=progressbar)

vars_to_sample = [
strip_observed(v) for v in get_default_varnames(vars_, include_transformed=False)
]

if not vars_to_sample:
return {}

if not hasattr(_trace, "varnames"):
inputs_and_names = [(i, i.name) for i in rv_ancestors(vars_to_sample)]
inputs, input_names = zip(*inputs_and_names)
else:
input_names = _trace.varnames
inputs = [model[n] for n in _trace.varnames]

if size is not None:
vars_to_sample = [change_rv_size(v, size, expand=True) for v in vars_to_sample]

sampler_fn = theano.function(
inputs,
vars_to_sample,
allow_input_downcast=True,
accept_inplace=True,
on_unused_input="ignore",
)

ppc_trace_t = _DefaultTrace(samples)
try:
for idx in indices:
Expand All @@ -1731,7 +1757,8 @@ def sample_posterior_predictive(
else:
param = _trace[idx % len_trace]

values = draw_values(vars_, point=param, size=size)
values = sampler_fn(*(param[n] for n in input_names))

for k, v in zip(vars_, values):
ppc_trace_t.insert(k.name, v, idx)
except KeyboardInterrupt:
Expand Down
12 changes: 7 additions & 5 deletions pymc3/tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def test_exceptions(self, caplog):
with pm.Model() as model:
mu = pm.Normal("mu", 0.0, 1.0)
a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.5, 0.2]))
trace = pm.sample()
trace = pm.sample(idata_kwargs={"log_likelihood": False})

with model:
with pytest.raises(IncorrectArgumentsError):
Expand All @@ -517,6 +517,7 @@ def test_exceptions(self, caplog):
# Not for fast_sample_posterior_predictive
with pytest.raises(IncorrectArgumentsError):
ppc = pm.sample_posterior_predictive(trace, size=4, keep_size=True)

# test wrong type argument
bad_trace = {"mu": stats.norm.rvs(size=1000)}
with pytest.raises(TypeError):
Expand All @@ -528,13 +529,14 @@ def test_vector_observed(self):
with pm.Model() as model:
mu = pm.Normal("mu", mu=0, sigma=1)
a = pm.Normal("a", mu=mu, sigma=1, observed=np.array([0.0, 1.0]))
trace = pm.sample()
trace = pm.sample(idata_kwargs={"log_likelihood": False})

with model:
# test list input
ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
ppc = pm.sample_posterior_predictive(trace, samples=12, var_names=[])
assert len(ppc) == 0
# ppc0 = pm.sample_posterior_predictive([model.test_point], samples=10)
# TODO: Assert something about the output
# ppc = pm.sample_posterior_predictive(trace, samples=12, var_names=[])
# assert len(ppc) == 0
ppc = pm.sample_posterior_predictive(trace, samples=12, var_names=["a"])
assert "a" in ppc
assert ppc["a"].shape == (12, 2)
Expand Down

0 comments on commit dc758cc

Please sign in to comment.