From 1f716ba36b450c184503f760f4e75f93c784dfec Mon Sep 17 00:00:00 2001 From: "Brandon T. Willard" Date: Fri, 29 Jan 2021 00:16:32 -0600 Subject: [PATCH] Remove shape dependencies from DictToArrayBijection This commit changes `DictToArrayBijection` so that it returns a `RaveledVars` datatype that contains the original raveled and concatenated vector along with the information needed to revert it back to dictionay/variables form. Simply put, the variables-to-single-vector mapping steps have been pushed away from the model object and its symbolic terms and closer to the (sampling) processes that produce and work with `ndarray` values for said terms. In doing so, we can operate under fewer unnecessarily strong assumptions (e.g. that the shapes of each term are static and equal to the initial test points), and let the sampling processes that require vector-only steps deal with any changes in the mappings. --- pymc3/blocking.py | 193 ++++-------------------- pymc3/distributions/discrete.py | 16 +- pymc3/model.py | 144 +++++------------- pymc3/parallel_sampling.py | 4 + pymc3/sampling.py | 44 +++--- pymc3/smc/smc.py | 4 +- pymc3/step_methods/arraystep.py | 81 +++++----- pymc3/step_methods/compound.py | 7 - pymc3/step_methods/hmc/base_hmc.py | 19 ++- pymc3/step_methods/hmc/integration.py | 23 ++- pymc3/step_methods/hmc/nuts.py | 23 +-- pymc3/step_methods/hmc/quadpotential.py | 42 +++--- pymc3/tests/test_distributions.py | 13 +- pymc3/tests/test_model.py | 109 ------------- pymc3/tests/test_model_func.py | 16 -- pymc3/theanof.py | 15 +- pymc3/tuning/scaling.py | 9 +- pymc3/tuning/starting.py | 10 +- 18 files changed, 229 insertions(+), 543 deletions(-) diff --git a/pymc3/blocking.py b/pymc3/blocking.py index 36696273500..01434a7f8ea 100644 --- a/pymc3/blocking.py +++ b/pymc3/blocking.py @@ -18,14 +18,14 @@ Classes for working with subsets of parameters. """ import collections -import copy -import numpy as np +from typing import Dict, List, Text, Union -from pymc3.util import get_var_name +import numpy as np -__all__ = ["ArrayOrdering", "DictToArrayBijection", "DictToVarBijection"] +__all__ = ["ArrayOrdering", "DictToArrayBijection"] +RaveledVars = collections.namedtuple("RaveledVars", "data, point_map_info") VarMap = collections.namedtuple("VarMap", "var, slc, shp, dtyp") DataMap = collections.namedtuple("DataMap", "list_ind, slc, shp, dtype, name") @@ -67,19 +67,11 @@ class DictToArrayBijection: A mapping between a dict space and an array space """ - def __init__(self, ordering, dpoint): + def __init__(self, ordering: List[Text]): + # TODO: Should just use `OrderedDict`s and remove this state entirely self.ordering = ordering - self.dpt = dpoint - # determine smallest float dtype that will fit all data - if all([x.dtyp == "float16" for x in ordering.vmap]): - self.array_dtype = "float16" - elif all([x.dtyp == "float32" for x in ordering.vmap]): - self.array_dtype = "float32" - else: - self.array_dtype = "float64" - - def map(self, dpt): + def map(self, dpt: Dict[Text, np.ndarray]): """ Maps value from dict space to array space @@ -87,12 +79,14 @@ def map(self, dpt): ---------- dpt: dict """ - apt = np.empty(self.ordering.size, dtype=self.array_dtype) - for var, slc, _, _ in self.ordering.vmap: - apt[slc] = dpt[var].ravel() - return apt - - def rmap(self, apt): + vars_info = tuple((dpt[n], n, dpt[n].shape, dpt[n].dtype) for n in self.ordering) + res = np.concatenate([v[0].ravel() for v in vars_info]) + return RaveledVars(res, tuple(v[1:] for v in vars_info)) + + @classmethod + def rmap( + cls, apt: RaveledVars, as_list=False + ) -> Union[Dict[Text, np.ndarray], List[np.ndarray]]: """ Maps value from array space to dict space @@ -100,12 +94,25 @@ def rmap(self, apt): ---------- apt: array """ - dpt = self.dpt.copy() + if as_list: + res = [] + else: + res = {} + + if not isinstance(apt, RaveledVars): + raise TypeError("`apt` must be a `RaveledVars` type") - for var, slc, shp, dtyp in self.ordering.vmap: - dpt[var] = np.atleast_1d(apt)[slc].reshape(shp).astype(dtyp) + last_idx = 0 + for name, shape, dtype in apt.point_map_info: + arr_len = np.prod(shape, dtype=int) + var = apt.data[last_idx : last_idx + arr_len].reshape(shape).astype(dtype) + if as_list: + res.append(var) + else: + res[name] = var + last_idx += arr_len - return dpt + return res def mapf(self, f): """ @@ -123,142 +130,6 @@ def mapf(self, f): return Compose(f, self.rmap) -class ListArrayOrdering: - """ - An ordering for a list to an array space. Takes also non theano.tensors. - Modified from pymc3 blocking. - - Parameters - ---------- - list_arrays: list - :class:`numpy.ndarray` or :class:`theano.tensor.Tensor` - intype: str - defining the input type 'tensor' or 'numpy' - """ - - def __init__(self, list_arrays, intype="numpy"): - if intype not in {"tensor", "numpy"}: - raise ValueError("intype not in {'tensor', 'numpy'}") - self.vmap = [] - self.intype = intype - self.size = 0 - for array in list_arrays: - if self.intype == "tensor": - name = array.name - array = array.tag.test_value - else: - name = "numpy" - - slc = slice(self.size, self.size + array.size) - self.vmap.append(DataMap(len(self.vmap), slc, array.shape, array.dtype, name)) - self.size += array.size - - -class ListToArrayBijection: - """ - A mapping between a List of arrays and an array space - - Parameters - ---------- - ordering: :class:`ListArrayOrdering` - list_arrays: list - of :class:`numpy.ndarray` - """ - - def __init__(self, ordering, list_arrays): - self.ordering = ordering - self.list_arrays = list_arrays - - def fmap(self, list_arrays): - """ - Maps values from List space to array space - - Parameters - ---------- - list_arrays: list - of :class:`numpy.ndarray` - - Returns - ------- - array: :class:`numpy.ndarray` - single array comprising all the input arrays - """ - - array = np.empty(self.ordering.size) - for list_ind, slc, _, _, _ in self.ordering.vmap: - array[slc] = list_arrays[list_ind].ravel() - return array - - def dmap(self, dpt): - """ - Maps values from dict space to List space - - Parameters - ---------- - list_arrays: list - of :class:`numpy.ndarray` - - Returns - ------- - point - """ - a_list = copy.copy(self.list_arrays) - - for list_ind, _, _, _, var in self.ordering.vmap: - a_list[list_ind] = dpt[var].ravel() - - return a_list - - def rmap(self, array): - """ - Maps value from array space to List space - Inverse operation of fmap. - - Parameters - ---------- - array: :class:`numpy.ndarray` - - Returns - ------- - a_list: list - of :class:`numpy.ndarray` - """ - - a_list = copy.copy(self.list_arrays) - - for list_ind, slc, shp, dtype, _ in self.ordering.vmap: - a_list[list_ind] = np.atleast_1d(array)[slc].reshape(shp).astype(dtype) - - return a_list - - -class DictToVarBijection: - """ - A mapping between a dict space and the array space for one element within the dict space - """ - - def __init__(self, var, idx, dpoint): - self.var = get_var_name(var) - self.idx = idx - self.dpt = dpoint - - def map(self, dpt): - return dpt[self.var][self.idx] - - def rmap(self, apt): - dpt = self.dpt.copy() - - dvar = dpt[self.var].copy() - dvar[self.idx] = apt - - dpt[self.var] = dvar - - return dpt - - def mapf(self, f): - return Compose(f, self.rmap) - - class Compose: """ Compose two functions in a pickleable way diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py index b8bf2f13c6e..2555c54598c 100644 --- a/pymc3/distributions/discrete.py +++ b/pymc3/distributions/discrete.py @@ -1357,7 +1357,7 @@ def dist(cls, p, **kwargs): @_logp.register(CategoricalRV) -def categorical_logp(op, value, p_, upper): +def categorical_logp(op, value, p, upper): r""" Calculate log-probability of Categorical distribution at specified value. @@ -1371,19 +1371,17 @@ def categorical_logp(op, value, p_, upper): ------- TensorVariable """ - p_ = self.p - k = self.k # Clip values before using them for indexing - value_clip = tt.clip(value, 0, k - 1) + value_clip = tt.clip(value, 0, p.size - 1) - p = p_ / tt.sum(p_, axis=-1, keepdims=True) + p = p / tt.sum(p, axis=-1, keepdims=True) if p.ndim > 1: if p.ndim > value_clip.ndim: - value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim) + value_clip = tt.shape_padleft(value_clip, p.ndim - value_clip.ndim) elif p.ndim < value_clip.ndim: - p = tt.shape_padleft(p, value_clip.ndim - p_.ndim) + p = tt.shape_padleft(p, value_clip.ndim - p.ndim) pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1)) a = tt.log( take_along_axis( @@ -1394,7 +1392,9 @@ def categorical_logp(op, value, p_, upper): else: a = tt.log(p[value_clip]) - return bound(a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1)) + return bound( + a, value >= 0, value <= (p.size - 1), tt.all(p >= 0, axis=-1), tt.all(p <= 1, axis=-1) + ) class Constant(Discrete): diff --git a/pymc3/model.py b/pymc3/model.py index fe6175aa95c..85daf48cd21 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -35,11 +35,11 @@ import pymc3 as pm -from pymc3.blocking import ArrayOrdering, DictToArrayBijection +from pymc3.blocking import DictToArrayBijection, RaveledVars from pymc3.distributions import logpt, logpt_sum from pymc3.exceptions import ImputationWarning from pymc3.math import flatten_list -from pymc3.memoize import WithMemoization, memoize +from pymc3.memoize import WithMemoization from pymc3.theanof import floatX, generator, gradient, hessian, inputvars from pymc3.util import get_transformed_name, get_var_name from pymc3.vartypes import continuous_types, discrete_types, isgenerator, typefilter @@ -607,8 +607,6 @@ class ValueGradFunction: Attributes ---------- - size: int - The number of elements in the parameter array. profile: theano profiling object or None The profiling object of the theano function that computes value and gradient. This is None unless `profile=True` was set in the @@ -626,8 +624,6 @@ def __init__( compute_grads=True, **kwargs, ): - from pymc3.distributions import TensorType - if extra_vars is None: extra_vars = [] @@ -657,9 +653,6 @@ def __init__( raise ValueError("All costs must be scalar.") cost = cost + self._weights[i] * val - self._cost = cost - self._ordering = ArrayOrdering(grad_vars) - self.size = self._ordering.size self._extra_are_set = False for var in self._grad_vars: if not np.can_cast(var.dtype, self.dtype, casting): @@ -677,27 +670,21 @@ def __init__( self._extra_vars_shared = {} for var in extra_vars: shared = theano.shared(var.tag.test_value, var.name + "_shared__") - # test TensorType compatibility - if hasattr(var.tag.test_value, "shape"): - testtype = TensorType(var.dtype, var.tag.test_value.shape) - - if testtype != shared.type: - shared.type = testtype self._extra_vars_shared[var.name] = shared givens.append((var, shared)) - self._vars_joined, self._cost_joined = self._build_joined( - self._cost, grad_vars, self._ordering.vmap - ) + self.bij = DictToArrayBijection([v.name for v in grad_vars]) if compute_grads: - grad = tt.grad(self._cost_joined, self._vars_joined) - grad.name = "__grad" - outputs = [self._cost_joined, grad] + grads = tt.grad(cost, grad_vars) + for grad_wrt, var in zip(grads, grad_vars): + grad_wrt.name = f"{var.name}_grad" + outputs = [cost] + grads else: - outputs = self._cost_joined + outputs = cost - inputs = [self._vars_joined] + # inputs = [vars_joined] + inputs = grad_vars self._theano_function = theano.function(inputs, outputs, givens=givens, **kwargs) @@ -717,77 +704,31 @@ def get_extra_values(self): return {var.name: self._extra_vars_shared[var.name].get_value() for var in self._extra_vars} - def __call__(self, array, grad_out=None, extra_vars=None): + def __call__(self, grad_vars, grad_out=None, extra_vars=None): if extra_vars is not None: self.set_extra_values(extra_vars) if not self._extra_are_set: raise ValueError("Extra values are not set.") - if array.shape != (self.size,): - raise ValueError( - "Invalid shape for array. Must be {} but is {}.".format((self.size,), array.shape) - ) + if isinstance(grad_vars, RaveledVars): + grad_vars = self.bij.rmap(grad_vars, as_list=True) - if grad_out is None: - out = np.empty_like(array) - else: - out = grad_out + cost, *grads = self._theano_function(*grad_vars) + + grad = self.bij.map({name: grad for name, grad in zip(self.bij.ordering, grads)}) - output = self._theano_function(array) if grad_out is None: - return output + return cost, grad.data else: - np.copyto(out, output[1]) - return output[0] + np.copyto(grad_out, grad.data) + return cost @property def profile(self): """Profiling information of the underlying theano function.""" return self._theano_function.profile - def dict_to_array(self, point): - """Convert a dictionary with values for grad_vars to an array.""" - array = np.empty(self.size, dtype=self.dtype) - for varmap in self._ordering.vmap: - array[varmap.slc] = point[varmap.var].ravel().astype(self.dtype) - return array - - def array_to_dict(self, array): - """Convert an array to a dictionary containing the grad_vars.""" - if array.shape != (self.size,): - raise ValueError(f"Array should have shape ({self.size},) but has {array.shape}") - if array.dtype != self.dtype: - raise ValueError( - f"Array has invalid dtype. Should be {self._dtype} but is {self.dtype}" - ) - point = {} - for varmap in self._ordering.vmap: - data = array[varmap.slc].reshape(varmap.shp) - point[varmap.var] = data.astype(varmap.dtyp) - - return point - - def array_to_full_dict(self, array): - """Convert an array to a dictionary with grad_vars and extra_vars.""" - point = self.array_to_dict(array) - for name, var in self._extra_vars_shared.items(): - point[name] = var.get_value() - return point - - def _build_joined(self, cost, args, vmap): - args_joined = tt.vector("__args_joined") - args_joined.tag.test_value = np.zeros(self.size, dtype=self.dtype) - - joined_slices = {} - for vmap in vmap: - sliced = args_joined[vmap.slc].reshape(vmap.shp) - sliced.name = vmap.var - joined_slices[vmap.var] = sliced - - replace = {var: joined_slices[var.name] for var in args} - return args_joined, theano.clone(cost, replace=replace) - class Model(Factor, WithMemoization, metaclass=ContextMeta): """Encapsulates the variables and likelihood factors of a model. @@ -945,19 +886,6 @@ def root(self): def isroot(self): return self.parent is None - @property # type: ignore - @memoize(bound=True) - def bijection(self): - vars = inputvars(self.vars) - - bij = DictToArrayBijection(ArrayOrdering(vars), self.test_point) - - return bij - - @property - def dict_to_array(self): - return self.bijection.map - @property def size(self): return sum(self.test_point[n.name].size for n in self.free_RVs) @@ -966,17 +894,6 @@ def size(self): def ndim(self): return sum(var.ndim for var in self.free_RVs) - @property - def logp_array(self): - return self.bijection.mapf(self.fastlogp) - - @property - def dlogp_array(self): - logpt = self.logpt - vars = inputvars(logpt) - dlogp = self.fastfn(gradient(self.logpt, vars)) - return self.bijection.mapf(dlogp) - def logp_dlogp_function(self, grad_vars=None, tempered=False, **kwargs): """Compile a theano function that computes logp and gradient. @@ -1413,7 +1330,7 @@ def flatten(self, vars=None, order=None, inputvar=None): ---------- vars: list of variables or None if None, then all model.free_RVs are used for flattening input - order: ArrayOrdering + order: list of variable names Optional, use predefined ordering inputvar: tt.vector Optional, use predefined inputvar @@ -1424,8 +1341,10 @@ def flatten(self, vars=None, order=None, inputvar=None): """ if vars is None: vars = self.vars - if order is None: - order = ArrayOrdering(vars) + if order is not None: + var_map = {v.name: v for v in vars} + vars = [var_map[n] for n in order] + if inputvar is None: inputvar = tt.vector("flat_view", dtype=theano.config.floatX) if theano.config.compute_test_value != "off": @@ -1433,12 +1352,19 @@ def flatten(self, vars=None, order=None, inputvar=None): inputvar.tag.test_value = flatten_list(vars).tag.test_value else: inputvar.tag.test_value = np.asarray([], inputvar.dtype) - replacements = { - self.named_vars[name]: inputvar[slc].reshape(shape).astype(dtype) - for name, slc, shape, dtype in order.vmap - } + + replacements = {} + last_idx = 0 + for var in vars: + arr_len = tt.prod(var.shape, dtype="int64") + replacements[self.named_vars[var.name]] = ( + inputvar[last_idx : (last_idx + arr_len)].reshape(var.shape).astype(var.dtype) + ) + last_idx += arr_len + view = {vm.var: vm for vm in order.vmap} flat_view = FlatView(inputvar, replacements, view) + return flat_view def check_test_point(self, test_point=None, round_vals=2): diff --git a/pymc3/parallel_sampling.py b/pymc3/parallel_sampling.py index bdfe1a274b7..3b8e7ecab42 100644 --- a/pymc3/parallel_sampling.py +++ b/pymc3/parallel_sampling.py @@ -153,6 +153,8 @@ def _wait_for_abortion(self): break def _make_numpy_refs(self): + # TODO: This code needs to determine the shapes and dtypes using a + # different approach shape_dtypes = self._step_method.vars_shape_dtype point = {} for name, (shape, dtype) in shape_dtypes.items(): @@ -251,6 +253,8 @@ def __init__( self._shared_point = {} self._point = {} + # TODO: This code needs to determine the shapes and dtypes using a + # different approach for name, (shape, dtype) in step_method.vars_shape_dtype.items(): size = 1 for dim in shape: diff --git a/pymc3/sampling.py b/pymc3/sampling.py index e29d2f0b8f7..f0108a0e901 100644 --- a/pymc3/sampling.py +++ b/pymc3/sampling.py @@ -39,6 +39,7 @@ from pymc3.backends.base import BaseTrace, MultiTrace from pymc3.backends.ndarray import NDArray +from pymc3.blocking import DictToArrayBijection from pymc3.distributions.distribution import draw_values from pymc3.distributions.posterior_predictive import fast_sample_posterior_predictive from pymc3.exceptions import IncorrectArgumentsError, SamplingError @@ -2102,16 +2103,21 @@ def init_nuts( pm.callbacks.CheckParametersConvergence(tolerance=1e-2, diff="relative"), ] + bij = DictToArrayBijection([v.name for v in inputvars(model.vars)]) + apoint = bij.map(model.test_point) + if init == "adapt_diag": start = [model.test_point] * chains - mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) + mean = np.mean([apoint.data] * chains, axis=0) var = np.ones_like(mean) - potential = quadpotential.QuadPotentialDiagAdapt(model.size, mean, var, 10) + n = len(var) + potential = quadpotential.QuadPotentialDiagAdapt(n, mean, var, 10) elif init == "jitter+adapt_diag": start = _init_jitter(model, chains, jitter_max_retries) - mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) + mean = np.mean([bij.map(vals).data for vals in start], axis=0) var = np.ones_like(mean) - potential = quadpotential.QuadPotentialDiagAdapt(model.size, mean, var, 10) + n = len(var) + potential = quadpotential.QuadPotentialDiagAdapt(n, mean, var, 10) elif init == "advi+adapt_diag_grad": approx: pm.MeanField = pm.fit( random_seed=random_seed, @@ -2124,12 +2130,12 @@ def init_nuts( ) start = approx.sample(draws=chains) start = list(start) - stds = approx.bij.rmap(approx.std.eval()) - cov = model.dict_to_array(stds) ** 2 - mean = approx.bij.rmap(approx.mean.get_value()) - mean = model.dict_to_array(mean) + std_apoint = approx.std.eval() + cov = std_apoint ** 2 + mean = approx.mean.get_value() weight = 50 - potential = quadpotential.QuadPotentialDiagAdaptGrad(model.size, mean, cov, weight) + n = len(cov) + potential = quadpotential.QuadPotentialDiagAdaptGrad(n, mean, cov, weight) elif init == "advi+adapt_diag": approx = pm.fit( random_seed=random_seed, @@ -2142,12 +2148,12 @@ def init_nuts( ) start = approx.sample(draws=chains) start = list(start) - stds = approx.bij.rmap(approx.std.eval()) - cov = model.dict_to_array(stds) ** 2 - mean = approx.bij.rmap(approx.mean.get_value()) - mean = model.dict_to_array(mean) + std_apoint = approx.std.eval() + cov = std_apoint ** 2 + mean = approx.mean.get_value() weight = 50 - potential = quadpotential.QuadPotentialDiagAdapt(model.size, mean, cov, weight) + n = len(cov) + potential = quadpotential.QuadPotentialDiagAdapt(n, mean, cov, weight) elif init == "advi": approx = pm.fit( random_seed=random_seed, @@ -2160,8 +2166,7 @@ def init_nuts( ) start = approx.sample(draws=chains) start = list(start) - stds = approx.bij.rmap(approx.std.eval()) - cov = model.dict_to_array(stds) ** 2 + cov = approx.std.eval() ** 2 potential = quadpotential.QuadPotentialDiag(cov) elif init == "advi_map": start = pm.find_MAP(include_transformed=True) @@ -2176,8 +2181,7 @@ def init_nuts( ) start = approx.sample(draws=chains) start = list(start) - stds = approx.bij.rmap(approx.std.eval()) - cov = model.dict_to_array(stds) ** 2 + cov = approx.std.eval() ** 2 potential = quadpotential.QuadPotentialDiag(cov) elif init == "map": start = pm.find_MAP(include_transformed=True) @@ -2186,12 +2190,12 @@ def init_nuts( potential = quadpotential.QuadPotentialFull(cov) elif init == "adapt_full": start = [model.test_point] * chains - mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) + mean = np.mean([apoint.data] * chains, axis=0) cov = np.eye(model.size) potential = quadpotential.QuadPotentialFullAdapt(model.size, mean, cov, 10) elif init == "jitter+adapt_full": start = _init_jitter(model, chains, jitter_max_retries) - mean = np.mean([model.dict_to_array(vals) for vals in start], axis=0) + mean = np.mean([bij.map(vals).data for vals in start], axis=0) cov = np.eye(model.size) potential = quadpotential.QuadPotentialFullAdapt(model.size, mean, cov, 10) else: diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py index 691854bcebe..0969292358e 100644 --- a/pymc3/smc/smc.py +++ b/pymc3/smc/smc.py @@ -22,6 +22,7 @@ from theano import function as theano_function from pymc3.backends.ndarray import NDArray +from pymc3.blocking import DictToArrayBijection from pymc3.model import Point, modelcontext from pymc3.sampling import sample_prior_predictive from pymc3.theanof import ( @@ -93,6 +94,7 @@ def initialize_population(self): init_rnd = self.start init = self.model.test_point + bij = DictToArrayBijection([v.name for v in self.model.vars]) for v in self.variables: var_info[v.name] = (init[v.name].shape, init[v.name].size) @@ -100,7 +102,7 @@ def initialize_population(self): for i in range(self.draws): point = Point({v.name: init_rnd[v.name][i] for v in self.variables}, model=self.model) - population.append(self.model.dict_to_array(point)) + population.append(bij.map(point).data) self.posterior = np.array(floatX(population)) self.var_info = var_info diff --git a/pymc3/step_methods/arraystep.py b/pymc3/step_methods/arraystep.py index c3e1cf6f8bb..cda1682b646 100644 --- a/pymc3/step_methods/arraystep.py +++ b/pymc3/step_methods/arraystep.py @@ -13,16 +13,15 @@ # limitations under the License. from enum import IntEnum, unique -from typing import Dict, List +from typing import Dict, List, Text import numpy as np from numpy.random import uniform -from pymc3.blocking import ArrayOrdering, DictToArrayBijection +from pymc3.blocking import DictToArrayBijection, RaveledVars from pymc3.model import PyMC3Variable, modelcontext from pymc3.step_methods.compound import CompoundStep -from pymc3.theanof import inputvars from pymc3.util import get_var_name __all__ = ["ArrayStep", "ArrayStepShared", "metrop_select", "Competence"] @@ -70,7 +69,7 @@ def __new__(cls, *args, **kwargs): vars = model.vars # get the actual inputs from the vars - vars = inputvars(vars) + # vars = inputvars(vars) if len(vars) == 0: raise ValueError("No free random variables to sample.") @@ -115,15 +114,6 @@ def _competence(cls, vars, have_grad): competences.append(cls.competence(var)) return competences - @property - def vars_shape_dtype(self): - shape_dtypes = {} - for var in self.vars: - dtype = np.dtype(var.dtype) - shape = var.dshape - shape_dtypes[var.name] = (shape, dtype) - return shape_dtypes - def stop_tuning(self): if hasattr(self, "tune"): self.tune = False @@ -144,24 +134,26 @@ class ArrayStep(BlockedStep): def __init__(self, vars, fs, allvars=False, blocked=True): self.vars = vars - self.ordering = ArrayOrdering(vars) + self.bij = DictToArrayBijection([v.name for v in vars]) self.fs = fs self.allvars = allvars self.blocked = blocked - def step(self, point): - bij = DictToArrayBijection(self.ordering, point) + def step(self, point: Dict[Text, np.ndarray]): - inputs = [bij.mapf(x) for x in self.fs] + inputs = [self.bij.mapf(x) for x in self.fs] if self.allvars: inputs.append(point) if self.generates_stats: - apoint, stats = self.astep(bij.map(point), *inputs) - return bij.rmap(apoint), stats + apoint, stats = self.astep(self.bij.map(point), *inputs) + return self.bij.rmap(apoint), stats else: - apoint = self.astep(bij.map(point), *inputs) - return bij.rmap(apoint) + apoint = self.astep(self.bij.map(point), *inputs) + return self.bij.rmap(apoint) + + def astep(self, apoint, point): + raise NotImplementedError() class ArrayStepShared(BlockedStep): @@ -181,7 +173,7 @@ def __init__(self, vars, shared, blocked=True): blocked: Boolean (default True) """ self.vars = vars - self.ordering = ArrayOrdering(vars) + self.bij = DictToArrayBijection(self.ordering) self.shared = {get_var_name(var): shared for var, shared in shared.items()} self.blocked = blocked self.bij = None @@ -190,15 +182,20 @@ def step(self, point): for var, share in self.shared.items(): share.set_value(point[var]) - self.bij = DictToArrayBijection(self.ordering, point) - if self.generates_stats: apoint, stats = self.astep(self.bij.map(point)) return self.bij.rmap(apoint), stats else: - apoint = self.astep(self.bij.map(point)) + array = self.bij.map(point) + apoint = self.astep(array) + if not isinstance(apoint, RaveledVars): + # We assume that the mapping has stayed the same + apoint = RaveledVars(apoint, array.point_map_info) return self.bij.rmap(apoint) + def astep(self, apoint): + raise NotImplementedError() + class PopulationArrayStepShared(ArrayStepShared): """Version of ArrayStepShared that allows samplers to access the states @@ -255,31 +252,33 @@ def __init__( else: func = logp_dlogp_func - # handle edge case discovered in #2948 - try: - func.set_extra_values(model.test_point) - q = func.dict_to_array(model.test_point) - logp, dlogp = func(q) - except ValueError: - if logp_dlogp_func is not None: - raise - theano_kwargs.update(mode="FAST_COMPILE") - func = model.logp_dlogp_function(vars, dtype=dtype, **theano_kwargs) - self._logp_dlogp_func = func + self.bij = DictToArrayBijection([v.name for v in vars]) + def step(self, point): self._logp_dlogp_func.set_extra_values(point) - array = self._logp_dlogp_func.dict_to_array(point) + array = self.bij.map(point) + + stats = None if self.generates_stats: apoint, stats = self.astep(array) - point = self._logp_dlogp_func.array_to_full_dict(apoint) - return point, stats else: apoint = self.astep(array) - point = self._logp_dlogp_func.array_to_full_dict(apoint) - return point + + if not isinstance(apoint, RaveledVars): + # We assume that the mapping has stayed the same + apoint = RaveledVars(apoint, array.point_map_info) + + point = self.bij.rmap(apoint) + + if stats is not None: + return point, stats + return point + + def astep(self, apoint): + raise NotImplementedError() def metrop_select(mr, q, q0): diff --git a/pymc3/step_methods/compound.py b/pymc3/step_methods/compound.py index 9e2975ab8bd..a92569bd30e 100644 --- a/pymc3/step_methods/compound.py +++ b/pymc3/step_methods/compound.py @@ -71,10 +71,3 @@ def reset_tuning(self): for method in self.methods: if hasattr(method, "reset_tuning"): method.reset_tuning() - - @property - def vars_shape_dtype(self): - dtype_shapes = {} - for method in self.methods: - dtype_shapes.update(method.vars_shape_dtype) - return dtype_shapes diff --git a/pymc3/step_methods/hmc/base_hmc.py b/pymc3/step_methods/hmc/base_hmc.py index 100e7f0731c..f5394d4a009 100644 --- a/pymc3/step_methods/hmc/base_hmc.py +++ b/pymc3/step_methods/hmc/base_hmc.py @@ -20,13 +20,15 @@ import numpy as np from pymc3.backends.report import SamplerWarning, WarningType +from pymc3.blocking import RaveledVars from pymc3.exceptions import SamplingError from pymc3.model import Point, modelcontext from pymc3.step_methods import arraystep, step_sizes from pymc3.step_methods.hmc import integration from pymc3.step_methods.hmc.quadpotential import QuadPotentialDiagAdapt, quad_potential -from pymc3.theanof import floatX, inputvars +from pymc3.theanof import floatX from pymc3.tuning import guess_scaling +from pymc3.vartypes import continuous_types, typefilter logger = logging.getLogger("pymc3") @@ -83,7 +85,8 @@ def __init__( if vars is None: vars = self._model.cont_vars - vars = inputvars(vars) + + # vars = inputvars(vars) super().__init__(vars, blocked=blocked, model=model, dtype=dtype, **theano_kwargs) @@ -143,6 +146,8 @@ def astep(self, q0): process_start = time.process_time() p0 = self.potential.random() + p0 = RaveledVars(p0, q0.point_map_info) + start = self.integrator.compute_state(q0, p0) if not np.isfinite(start.energy): @@ -151,7 +156,7 @@ def astep(self, q0): error_logp = check_test_point.loc[ (np.abs(check_test_point) >= 1e20) | np.isnan(check_test_point) ] - self.potential.raise_ok(self._logp_dlogp_func._ordering.vmap) + self.potential.raise_ok(q0.point_map_info) message_energy = ( "Bad initial energy, check any log probabilities that " "are inf or -inf, nan or very small:\n{}".format(error_logp.to_string()) @@ -172,7 +177,7 @@ def astep(self, q0): if self._step_rand is not None: step_size = self._step_rand(step_size) - hmc_step = self._hamiltonian_step(start, p0, step_size) + hmc_step = self._hamiltonian_step(start, p0.data, step_size) perf_end = time.perf_counter() process_end = time.process_time() @@ -191,9 +196,11 @@ def astep(self, q0): self._num_divs_sample += 1 # We don't want to fill up all memory with divergence info if self._num_divs_sample < 100 and info.state is not None: - point = self._logp_dlogp_func.array_to_dict(info.state.q) + point = self.bij.rmap(info.state.q) + if self._num_divs_sample < 100 and info.state_div is not None: - point_dest = self._logp_dlogp_func.array_to_dict(info.state_div.q) + point = self.bij.rmap(info.state_div.q) + if self._num_divs_sample < 100: info_store = info warning = SamplerWarning( diff --git a/pymc3/step_methods/hmc/integration.py b/pymc3/step_methods/hmc/integration.py index 0043d6953a9..e1538c3168e 100644 --- a/pymc3/step_methods/hmc/integration.py +++ b/pymc3/step_methods/hmc/integration.py @@ -18,6 +18,8 @@ from scipy import linalg +from pymc3.blocking import RaveledVars + State = namedtuple("State", "q, p, v, q_grad, energy, model_logp") @@ -39,11 +41,13 @@ def __init__(self, potential, logp_dlogp_func): def compute_state(self, q, p): """Compute Hamiltonian functions using a position and momentum.""" - if q.dtype != self._dtype or p.dtype != self._dtype: + if q.data.dtype != self._dtype or p.data.dtype != self._dtype: raise ValueError("Invalid dtype. Must be %s" % self._dtype) + logp, dlogp = self._logp_dlogp_func(q) - v = self._potential.velocity(p) - kinetic = self._potential.energy(p, velocity=v) + + v = self._potential.velocity(p.data) + kinetic = self._potential.energy(p.data, velocity=v) energy = kinetic - logp return State(q, p, v, dlogp, energy, logp) @@ -83,8 +87,8 @@ def _step(self, epsilon, state): axpy = linalg.blas.get_blas_funcs("axpy", dtype=self._dtype) pot = self._potential - q_new = state.q.copy() - p_new = state.p.copy() + q_new = state.q.data.copy() + p_new = state.p.data.copy() v_new = np.empty_like(q_new) q_new_grad = np.empty_like(q_new) @@ -99,12 +103,15 @@ def _step(self, epsilon, state): # q_new = q + epsilon * v_new axpy(v_new, q_new, a=epsilon) - logp = self._logp_dlogp_func(q_new, q_new_grad) + p_new = RaveledVars(p_new, state.p.point_map_info) + q_new = RaveledVars(q_new, state.q.point_map_info) + + logp = self._logp_dlogp_func(q_new, grad_out=q_new_grad) # p_new = p_new + dt * q_new_grad - axpy(q_new_grad, p_new, a=dt) + axpy(q_new_grad, p_new.data, a=dt) - kinetic = pot.velocity_energy(p_new, v_new) + kinetic = pot.velocity_energy(p_new.data, v_new) energy = kinetic - logp return State(q_new, p_new, v_new, q_new_grad, energy, logp) diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py index c26bcf91e21..7b0a7f5b2dd 100644 --- a/pymc3/step_methods/hmc/nuts.py +++ b/pymc3/step_methods/hmc/nuts.py @@ -250,13 +250,15 @@ def __init__(self, ndim, integrator, start, step_size, Emax): self.start_energy = np.array(start.energy) self.left = self.right = start - self.proposal = Proposal(start.q, start.q_grad, start.energy, 1.0, start.model_logp) + self.proposal = Proposal( + start.q.data, start.q_grad.data, start.energy, 1.0, start.model_logp + ) self.depth = 0 self.log_size = 0 self.log_weighted_accept_sum = -np.inf self.mean_tree_accept = 0.0 self.n_proposals = 0 - self.p_sum = start.p.copy() + self.p_sum = start.p.data.copy() self.max_energy_change = 0 def extend(self, direction): @@ -311,9 +313,9 @@ def extend(self, direction): left, right = self.left, self.right p_sum = self.p_sum turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0) - p_sum1 = leftmost_p_sum + rightmost_begin.p + p_sum1 = leftmost_p_sum + rightmost_begin.p.data turning1 = (p_sum1.dot(leftmost_begin.v) <= 0) or (p_sum1.dot(rightmost_begin.v) <= 0) - p_sum2 = leftmost_end.p + rightmost_p_sum + p_sum2 = leftmost_end.p.data + rightmost_p_sum turning2 = (p_sum2.dot(leftmost_end.v) <= 0) or (p_sum2.dot(rightmost_end.v) <= 0) turning = turning | turning1 | turning2 @@ -322,6 +324,7 @@ def extend(self, direction): def _single_step(self, left, epsilon): """Perform a leapfrog step and handle error cases.""" try: + # `State` type right = self.integrator.step(epsilon, left) except IntegrationError as err: error_msg = str(err) @@ -343,13 +346,15 @@ def _single_step(self, left, epsilon): log_p_accept_weighted = -energy_change + min(0.0, -energy_change) log_size = -energy_change proposal = Proposal( - right.q, - right.q_grad, + right.q.data, + right.q_grad.data, right.energy, log_p_accept_weighted, right.model_logp, ) - tree = Subtree(right, right, right.p, proposal, log_size, log_p_accept_weighted, 1) + tree = Subtree( + right, right, right.p.data, proposal, log_size, log_p_accept_weighted, 1 + ) return tree, None, False else: error_msg = "Energy change in leapfrog step is too large: %s." % energy_change @@ -375,9 +380,9 @@ def _build_subtree(self, left, depth, epsilon): turning = (p_sum.dot(left.v) <= 0) or (p_sum.dot(right.v) <= 0) # Additional U turn check only when depth > 1 to avoid redundant work. if depth - 1 > 0: - p_sum1 = tree1.p_sum + tree2.left.p + p_sum1 = tree1.p_sum + tree2.left.p.data turning1 = (p_sum1.dot(tree1.left.v) <= 0) or (p_sum1.dot(tree2.left.v) <= 0) - p_sum2 = tree1.right.p + tree2.p_sum + p_sum2 = tree1.right.p.data + tree2.p_sum turning2 = (p_sum2.dot(tree1.right.v) <= 0) or (p_sum2.dot(tree2.right.v) <= 0) turning = turning | turning1 | turning2 diff --git a/pymc3/step_methods/hmc/quadpotential.py b/pymc3/step_methods/hmc/quadpotential.py index 4c2e6acc7a3..d1c61ac4595 100644 --- a/pymc3/step_methods/hmc/quadpotential.py +++ b/pymc3/step_methods/hmc/quadpotential.py @@ -120,7 +120,7 @@ def raise_ok(self, vmap=None): Parameters ---------- - vmap: blocking.ArrayOrdering.vmap + vmap: list of blocking.VarMap List of `VarMap`s, which are namedtuples with var, slc, shp, dtyp Raises @@ -240,12 +240,12 @@ def update(self, sample, grad, tune): self._n_samples += 1 - def raise_ok(self, vmap): + def raise_ok(self, map_info): """Check if the mass matrix is ok, and raise ValueError if not. Parameters ---------- - vmap: blocking.ArrayOrdering.vmap + vmap: List of tuples (var, ) List of `VarMap`s, which are namedtuples with var, slc, shp, dtyp Raises @@ -257,33 +257,25 @@ def raise_ok(self, vmap): None """ if np.any(self._stds == 0): - name_slc = [] - tmp_hold = list(range(self._stds.size)) - for vmap_ in vmap: - slclen = len(tmp_hold[vmap_.slc]) - for i in range(slclen): - name_slc.append((vmap_.var, i)) - index = np.where(self._stds == 0)[0] errmsg = ["Mass matrix contains zeros on the diagonal. "] - for ii in index: - errmsg.append( - "The derivative of RV `{}`.ravel()[{}] is zero.".format(*name_slc[ii]) - ) + last_idx = 0 + for name, shape, dtype in map_info: + arr_len = np.prod(shape, dtype=int) + index = np.where(self._stds[last_idx : last_idx + arr_len] == 0)[0] + errmsg.append(f"The derivative of RV `{name}`.ravel()[{index}] is zero.") + last_idx += arr_len + raise ValueError("\n".join(errmsg)) if np.any(~np.isfinite(self._stds)): - name_slc = [] - tmp_hold = list(range(self._stds.size)) - for vmap_ in vmap: - slclen = len(tmp_hold[vmap_.slc]) - for i in range(slclen): - name_slc.append((vmap_.var, i)) - index = np.where(~np.isfinite(self._stds))[0] errmsg = ["Mass matrix contains non-finite values on the diagonal. "] - for ii in index: - errmsg.append( - "The derivative of RV `{}`.ravel()[{}] is non-finite.".format(*name_slc[ii]) - ) + + last_idx = 0 + for name, shape, dtype in map_info: + arr_len = np.prod(shape, dtype=int) + index = np.where(~np.isfinite(self._stds[last_idx : last_idx + arr_len]))[0] + errmsg.append(f"The derivative of RV `{name}`.ravel()[{index}] is non-finite.") + last_idx += arr_len raise ValueError("\n".join(errmsg)) diff --git a/pymc3/tests/test_distributions.py b/pymc3/tests/test_distributions.py index 5cbede8e507..7c5350243a8 100644 --- a/pymc3/tests/test_distributions.py +++ b/pymc3/tests/test_distributions.py @@ -23,7 +23,7 @@ import theano import theano.tensor as tt -from numpy import array, exp, inf, log +from numpy import array, inf, log from numpy.testing import assert_allclose, assert_almost_equal, assert_equal from packaging.version import parse from scipy import __version__ as scipy_version @@ -32,7 +32,6 @@ import pymc3 as pm -from pymc3.blocking import DictToVarBijection from pymc3.distributions import ( AR1, AsymmetricLaplace, @@ -732,15 +731,6 @@ def check_selfconsistency_discrete_logcdf( err_msg=str(pt), ) - def check_int_to_1(self, model, value, domain, paramdomains): - pdf = model.fastfn(exp(model.logpt)) - for pt in product(paramdomains, n_samples=10): - pt = Point(pt, value=value.tag.test_value, model=model) - bij = DictToVarBijection(value, (), pt) - pdfx = bij.mapf(pdf) - area = integrate_nd(pdfx, domain, value.dshape, value.dtype) - assert_almost_equal(area, 1, err_msg=str(pt)) - def checkd(self, distfam, valuedomain, vardomains, checks=None, extra_args=None): if checks is None: checks = (self.check_int_to_1,) @@ -2497,7 +2487,6 @@ def test_issue_3051(self, dims, dist_cls, kwargs): actual_a = actual_t.eval() assert isinstance(actual_a, np.ndarray) assert actual_a.shape == (X.shape[0],) - pass def test_serialize_density_dist(): diff --git a/pymc3/tests/test_model.py b/pymc3/tests/test_model.py index 12b93eb6c8e..6c0bfd41e7d 100644 --- a/pymc3/tests/test_model.py +++ b/pymc3/tests/test_model.py @@ -13,7 +13,6 @@ # limitations under the License. import pickle -import unittest import numpy as np import numpy.testing as npt @@ -26,7 +25,6 @@ from pymc3 import Deterministic, Potential from pymc3.distributions import HalfCauchy, Normal, transforms -from pymc3.model import ValueGradFunction from pymc3.tests.helpers import select_by_precision @@ -260,113 +258,6 @@ def test_empty_observed(): npt.assert_allclose(b.tag.test_value, np.ones((2, 3)) / 2) -class TestValueGradFunction(unittest.TestCase): - def test_no_extra(self): - a = tt.vector("a") - a.tag.test_value = np.zeros(3, dtype=a.dtype) - a.dshape = (3,) - a.dsize = 3 - f_grad = ValueGradFunction([a.sum()], [a], [], mode="FAST_COMPILE") - assert f_grad.size == 3 - - def test_invalid_type(self): - a = tt.ivector("a") - a.tag.test_value = np.zeros(3, dtype=a.dtype) - a.dshape = (3,) - a.dsize = 3 - with pytest.raises(TypeError) as err: - ValueGradFunction([a.sum()], [a], [], mode="FAST_COMPILE") - err.match("Invalid dtype") - - def setUp(self): - extra1 = tt.iscalar("extra1") - extra1_ = np.array(0, dtype=extra1.dtype) - extra1.tag.test_value = extra1_ - extra1.dshape = tuple() - extra1.dsize = 1 - - val1 = tt.vector("val1") - val1_ = np.zeros(3, dtype=val1.dtype) - val1.tag.test_value = val1_ - val1.dshape = (3,) - val1.dsize = 3 - - val2 = tt.matrix("val2") - val2_ = np.zeros((2, 3), dtype=val2.dtype) - val2.tag.test_value = val2_ - val2.dshape = (2, 3) - val2.dsize = 6 - - self.val1, self.val1_ = val1, val1_ - self.val2, self.val2_ = val2, val2_ - self.extra1, self.extra1_ = extra1, extra1_ - - self.cost = extra1 * val1.sum() + val2.sum() - - self.f_grad = ValueGradFunction([self.cost], [val1, val2], [extra1], mode="FAST_COMPILE") - - def test_extra_not_set(self): - with pytest.raises(ValueError) as err: - self.f_grad.get_extra_values() - err.match("Extra values are not set") - - with pytest.raises(ValueError) as err: - self.f_grad(np.zeros(self.f_grad.size, dtype=self.f_grad.dtype)) - err.match("Extra values are not set") - - def test_grad(self): - self.f_grad.set_extra_values({"extra1": 5}) - array = np.ones(self.f_grad.size, dtype=self.f_grad.dtype) - val, grad = self.f_grad(array) - assert val == 21 - npt.assert_allclose(grad, [5, 5, 5, 1, 1, 1, 1, 1, 1]) - - def test_bij(self): - self.f_grad.set_extra_values({"extra1": 5}) - array = np.ones(self.f_grad.size, dtype=self.f_grad.dtype) - point = self.f_grad.array_to_dict(array) - assert len(point) == 2 - npt.assert_allclose(point["val1"], 1) - npt.assert_allclose(point["val2"], 1) - - array2 = self.f_grad.dict_to_array(point) - npt.assert_allclose(array2, array) - point_ = self.f_grad.array_to_full_dict(array) - assert len(point_) == 3 - assert point_["extra1"] == 5 - - def test_edge_case(self): - # Edge case discovered in #2948 - ndim = 3 - with pm.Model() as m: - pm.Lognormal( - "sigma", mu=np.zeros(ndim), tau=np.ones(ndim), shape=ndim - ) # variance for the correlation matrix - pm.HalfCauchy("nu", beta=10) - step = pm.NUTS() - - func = step._logp_dlogp_func - func.set_extra_values(m.test_point) - q = func.dict_to_array(m.test_point) - logp, dlogp = func(q) - assert logp.size == 1 - assert dlogp.size == 4 - npt.assert_allclose(dlogp, 0.0, atol=1e-5) - - def test_tensor_type_conversion(self): - # case described in #3122 - X = np.random.binomial(1, 0.5, 10) - X[0] = -1 # masked a single value - X = np.ma.masked_values(X, value=-1) - with pm.Model() as m: - x1 = pm.Uniform("x1", 0.0, 1.0) - x2 = pm.Bernoulli("x2", x1, observed=X) - - gf = m.logp_dlogp_function() - - assert m["x2_missing"].type == gf._extra_vars_shared["x2_missing"].type - - def test_multiple_observed_rv(): "Test previously buggy MultiObservedRV comparison code." y1_data = np.random.randn(10) diff --git a/pymc3/tests/test_model_func.py b/pymc3/tests/test_model_func.py index d231233406f..c9ab9233bb5 100644 --- a/pymc3/tests/test_model_func.py +++ b/pymc3/tests/test_model_func.py @@ -50,19 +50,3 @@ def test_deterministic(): assert model.y == y assert model["y"] == y - - -def test_mapping(): - with pm.Model() as model: - mu = pm.Normal("mu", 0, 1) - sd = pm.Gamma("sd", 1, 1) - y = pm.Normal("y", mu, sd, observed=np.array([0.1, 0.5])) - lp = model.fastlogp - lparray = model.logp_array - point = model.test_point - parray = model.bijection.map(point) - assert lp(point) == lparray(parray) - - randarray = np.random.randn(*parray.shape) - randpoint = model.bijection.rmap(randarray) - assert lp(randpoint) == lparray(randarray) diff --git a/pymc3/theanof.py b/pymc3/theanof.py index c40311da6e8..f4593beb2a5 100644 --- a/pymc3/theanof.py +++ b/pymc3/theanof.py @@ -21,7 +21,6 @@ from theano.graph.op import Op from theano.sandbox.rng_mrg import MRG_RandomStream as RandomStream -from pymc3.blocking import ArrayOrdering from pymc3.data import GeneratorAdapter from pymc3.vartypes import continuous_types, int_types, typefilter @@ -264,14 +263,16 @@ def join_nonshared_inputs(xs, vars, shared, make_shared=False): else: inarray = theano.shared(joined.tag.test_value, "inarray") - ordering = ArrayOrdering(vars) inarray.tag.test_value = joined.tag.test_value - get_var = {var.name: var for var in vars} - replace = { - get_var[var]: reshape_t(inarray[slc], shp).astype(dtyp) - for var, slc, shp, dtyp in ordering.vmap - } + replace = {} + last_idx = 0 + for var in vars: + arr_len = tt.prod(var.shape) + replace[var] = reshape_t(inarray[last_idx : last_idx + arr_len], var.shape).astype( + var.dtype + ) + last_idx += arr_len replace.update(shared) diff --git a/pymc3/tuning/scaling.py b/pymc3/tuning/scaling.py index 49a59ff0d74..f9ec062dbc9 100644 --- a/pymc3/tuning/scaling.py +++ b/pymc3/tuning/scaling.py @@ -16,7 +16,7 @@ from numpy import exp, log, sqrt -from pymc3.blocking import ArrayOrdering, DictToArrayBijection +from pymc3.blocking import DictToArrayBijection from pymc3.model import Point, modelcontext from pymc3.theanof import hessian_diag, inputvars from pymc3.util import get_var_name @@ -43,7 +43,12 @@ def fixed_hessian(point, vars=None, model=None): point = Point(point, model=model) - bij = DictToArrayBijection(ArrayOrdering(vars), point) + bij = DictToArrayBijection( + [v.name for v in vars], + [v.shape for v in [point[v.name] for v in vars]], + [v.dtype for v in [point[v.name] for v in vars]], + ) + rval = np.ones(bij.map(point).size) / 10 return rval diff --git a/pymc3/tuning/starting.py b/pymc3/tuning/starting.py index 86e44c3a54c..7da0cd68c97 100644 --- a/pymc3/tuning/starting.py +++ b/pymc3/tuning/starting.py @@ -26,7 +26,7 @@ import pymc3 as pm -from pymc3.blocking import ArrayOrdering, DictToArrayBijection +from pymc3.blocking import DictToArrayBijection from pymc3.model import Point, modelcontext from pymc3.theanof import inputvars from pymc3.util import ( @@ -104,7 +104,13 @@ def find_MAP( check_start_vals(start, model) start = Point(start, model=model) - bij = DictToArrayBijection(ArrayOrdering(vars), start) + + bij = DictToArrayBijection( + [v.name for v in vars], + [v.shape for v in [start[v.name] for v in vars]], + [v.dtype for v in [start[v.name] for v in vars]], + ) + logp_func = bij.mapf(model.fastlogp_nojac) x0 = bij.map(start)