Skip to content

Commit

Permalink
Remove shape dependencies from DictToArrayBijection
Browse files Browse the repository at this point in the history
This commit changes `DictToArrayBijection` so that it returns a `RaveledVars`
datatype that contains the original raveled and concatenated vector along with
the information needed to revert it back to dictionay/variables form.

Simply put, the variables-to-single-vector mapping steps have been pushed away
from the model object and its symbolic terms and closer to the (sampling)
processes that produce and work with `ndarray` values for said terms.  In doing
so, we can operate under fewer unnecessarily strong assumptions (e.g. that the
shapes of each term are static and equal to the initial test points), and let
the sampling processes that require vector-only steps deal with any changes in
the mappings.
  • Loading branch information
brandonwillard committed Feb 5, 2021
1 parent 33b240d commit c377b13
Show file tree
Hide file tree
Showing 20 changed files with 268 additions and 497 deletions.
226 changes: 46 additions & 180 deletions pymc3/blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,21 +18,20 @@
Classes for working with subsets of parameters.
"""
import collections
import copy

import numpy as np
from typing import Dict, List, Optional, Union

from pymc3.util import get_var_name
import numpy as np

__all__ = ["ArrayOrdering", "DictToArrayBijection", "DictToVarBijection"]
__all__ = ["ArrayOrdering", "DictToArrayBijection"]

# `point_map_info` is a tuple of tuples containing `(name, shape, dtype)` for
# each of the raveled variables.
RaveledVars = collections.namedtuple("RaveledVars", "data, point_map_info")
VarMap = collections.namedtuple("VarMap", "var, slc, shp, dtyp")
DataMap = collections.namedtuple("DataMap", "list_ind, slc, shp, dtype, name")


# TODO Classes and methods need to be fully documented.


class ArrayOrdering:
"""
An ordering for an array space
Expand Down Expand Up @@ -63,200 +62,67 @@ def __getitem__(self, key):


class DictToArrayBijection:
"""
A mapping between a dict space and an array space
"""

def __init__(self, ordering, dpoint):
self.ordering = ordering
self.dpt = dpoint
"""Map between a `dict`s of variables to an array space.
# determine smallest float dtype that will fit all data
if all([x.dtyp == "float16" for x in ordering.vmap]):
self.array_dtype = "float16"
elif all([x.dtyp == "float32" for x in ordering.vmap]):
self.array_dtype = "float32"
else:
self.array_dtype = "float64"
Said array space consists of all the vars raveled and then concatenated.
def map(self, dpt):
"""
Maps value from dict space to array space
"""

Parameters
----------
dpt: dict
"""
apt = np.empty(self.ordering.size, dtype=self.array_dtype)
for var, slc, _, _ in self.ordering.vmap:
apt[slc] = dpt[var].ravel()
return apt
@staticmethod
def map(var_dict: Dict[str, np.ndarray]) -> RaveledVars:
"""Map a dictionary of names and variables to a concatenated 1D array space."""
vars_info = tuple((v, k, v.shape, v.dtype) for k, v in var_dict.items())
res = np.concatenate([v[0].ravel() for v in vars_info])
return RaveledVars(res, tuple(v[1:] for v in vars_info))

def rmap(self, apt):
"""
Maps value from array space to dict space
@staticmethod
def rmap(
array: RaveledVars, as_list: Optional[bool] = False
) -> Union[Dict[str, np.ndarray], List[np.ndarray]]:
"""Map 1D concatenated array to a dictionary of variables in their original spaces.
Parameters
----------
apt: array
==========
array
The array to map.
as_list
When ``True``, return a list of the original variables instead of a
``dict`` keyed each variable's name.
"""
dpt = self.dpt.copy()
if as_list:
res = []
else:
res = {}

if not isinstance(array, RaveledVars):
raise TypeError("`apt` must be a `RaveledVars` type")

for var, slc, shp, dtyp in self.ordering.vmap:
dpt[var] = np.atleast_1d(apt)[slc].reshape(shp).astype(dtyp)
last_idx = 0
for name, shape, dtype in array.point_map_info:
arr_len = np.prod(shape, dtype=int)
var = array.data[last_idx : last_idx + arr_len].reshape(shape).astype(dtype)
if as_list:
res.append(var)
else:
res[name] = var
last_idx += arr_len

return dpt
return res

def mapf(self, f):
@classmethod
def mapf(cls, f):
"""
function f: DictSpace -> T to ArraySpace -> T
Parameters
----------
f: dict -> T
Returns
-------
f: array -> T
"""
return Compose(f, self.rmap)


class ListArrayOrdering:
"""
An ordering for a list to an array space. Takes also non theano.tensors.
Modified from pymc3 blocking.
Parameters
----------
list_arrays: list
:class:`numpy.ndarray` or :class:`theano.tensor.Tensor`
intype: str
defining the input type 'tensor' or 'numpy'
"""

def __init__(self, list_arrays, intype="numpy"):
if intype not in {"tensor", "numpy"}:
raise ValueError("intype not in {'tensor', 'numpy'}")
self.vmap = []
self.intype = intype
self.size = 0
for array in list_arrays:
if self.intype == "tensor":
name = array.name
array = array.tag.test_value
else:
name = "numpy"

slc = slice(self.size, self.size + array.size)
self.vmap.append(DataMap(len(self.vmap), slc, array.shape, array.dtype, name))
self.size += array.size


class ListToArrayBijection:
"""
A mapping between a List of arrays and an array space
Parameters
----------
ordering: :class:`ListArrayOrdering`
list_arrays: list
of :class:`numpy.ndarray`
"""

def __init__(self, ordering, list_arrays):
self.ordering = ordering
self.list_arrays = list_arrays

def fmap(self, list_arrays):
"""
Maps values from List space to array space
Parameters
----------
list_arrays: list
of :class:`numpy.ndarray`
Returns
-------
array: :class:`numpy.ndarray`
single array comprising all the input arrays
"""

array = np.empty(self.ordering.size)
for list_ind, slc, _, _, _ in self.ordering.vmap:
array[slc] = list_arrays[list_ind].ravel()
return array

def dmap(self, dpt):
"""
Maps values from dict space to List space
Parameters
----------
list_arrays: list
of :class:`numpy.ndarray`
Returns
-------
point
"""
a_list = copy.copy(self.list_arrays)

for list_ind, _, _, _, var in self.ordering.vmap:
a_list[list_ind] = dpt[var].ravel()

return a_list

def rmap(self, array):
"""
Maps value from array space to List space
Inverse operation of fmap.
Parameters
----------
array: :class:`numpy.ndarray`
Returns
-------
a_list: list
of :class:`numpy.ndarray`
"""

a_list = copy.copy(self.list_arrays)

for list_ind, slc, shp, dtype, _ in self.ordering.vmap:
a_list[list_ind] = np.atleast_1d(array)[slc].reshape(shp).astype(dtype)

return a_list


class DictToVarBijection:
"""
A mapping between a dict space and the array space for one element within the dict space
"""

def __init__(self, var, idx, dpoint):
self.var = get_var_name(var)
self.idx = idx
self.dpt = dpoint

def map(self, dpt):
return dpt[self.var][self.idx]

def rmap(self, apt):
dpt = self.dpt.copy()

dvar = dpt[self.var].copy()
dvar[self.idx] = apt

dpt[self.var] = dvar

return dpt

def mapf(self, f):
return Compose(f, self.rmap)
return Compose(f, cls.rmap)


class Compose:
Expand Down
16 changes: 8 additions & 8 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -1341,7 +1341,7 @@ def dist(cls, p, **kwargs):


@_logp.register(CategoricalRV)
def categorical_logp(op, value, p_, upper):
def categorical_logp(op, value, p, upper):
r"""
Calculate log-probability of Categorical distribution at specified value.
Expand All @@ -1355,19 +1355,17 @@ def categorical_logp(op, value, p_, upper):
-------
TensorVariable
"""
p_ = self.p
k = self.k

# Clip values before using them for indexing
value_clip = tt.clip(value, 0, k - 1)
value_clip = tt.clip(value, 0, p.size - 1)

p = p_ / tt.sum(p_, axis=-1, keepdims=True)
p = p / tt.sum(p, axis=-1, keepdims=True)

if p.ndim > 1:
if p.ndim > value_clip.ndim:
value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim)
value_clip = tt.shape_padleft(value_clip, p.ndim - value_clip.ndim)
elif p.ndim < value_clip.ndim:
p = tt.shape_padleft(p, value_clip.ndim - p_.ndim)
p = tt.shape_padleft(p, value_clip.ndim - p.ndim)
pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
a = tt.log(
take_along_axis(
Expand All @@ -1378,7 +1376,9 @@ def categorical_logp(op, value, p_, upper):
else:
a = tt.log(p[value_clip])

return bound(a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1))
return bound(
a, value >= 0, value <= (p.size - 1), tt.all(p >= 0, axis=-1), tt.all(p <= 1, axis=-1)
)


class Constant(Discrete):
Expand Down
Loading

0 comments on commit c377b13

Please sign in to comment.