Skip to content

Commit

Permalink
Remove shape dependencies from DictToArrayBijection
Browse files Browse the repository at this point in the history
This commit changes `DictToArrayBijection` so that it returns a `RaveledVars`
datatype that contains the original raveled and concatenated vector along with
the information needed to revert it back to dictionay/variables form.

Simply put, the variables-to-single-vector mapping steps have been pushed away
from the model object and its symbolic terms and closer to the (sampling)
processes that produce and work with `ndarray` values for said terms.  In doing
so, we can operate under fewer unnecessarily strong assumptions (e.g. that the
shapes of each term are static and equal to the initial test points), and let
the sampling processes that require vector-only steps deal with any changes in
the mappings.
  • Loading branch information
brandonwillard committed Feb 2, 2021
1 parent 3d0a814 commit 1f716ba
Show file tree
Hide file tree
Showing 18 changed files with 229 additions and 543 deletions.
193 changes: 32 additions & 161 deletions pymc3/blocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
Classes for working with subsets of parameters.
"""
import collections
import copy

import numpy as np
from typing import Dict, List, Text, Union

from pymc3.util import get_var_name
import numpy as np

__all__ = ["ArrayOrdering", "DictToArrayBijection", "DictToVarBijection"]
__all__ = ["ArrayOrdering", "DictToArrayBijection"]

RaveledVars = collections.namedtuple("RaveledVars", "data, point_map_info")
VarMap = collections.namedtuple("VarMap", "var, slc, shp, dtyp")
DataMap = collections.namedtuple("DataMap", "list_ind, slc, shp, dtype, name")

Expand Down Expand Up @@ -67,45 +67,52 @@ class DictToArrayBijection:
A mapping between a dict space and an array space
"""

def __init__(self, ordering, dpoint):
def __init__(self, ordering: List[Text]):
# TODO: Should just use `OrderedDict`s and remove this state entirely
self.ordering = ordering
self.dpt = dpoint

# determine smallest float dtype that will fit all data
if all([x.dtyp == "float16" for x in ordering.vmap]):
self.array_dtype = "float16"
elif all([x.dtyp == "float32" for x in ordering.vmap]):
self.array_dtype = "float32"
else:
self.array_dtype = "float64"

def map(self, dpt):
def map(self, dpt: Dict[Text, np.ndarray]):
"""
Maps value from dict space to array space
Parameters
----------
dpt: dict
"""
apt = np.empty(self.ordering.size, dtype=self.array_dtype)
for var, slc, _, _ in self.ordering.vmap:
apt[slc] = dpt[var].ravel()
return apt

def rmap(self, apt):
vars_info = tuple((dpt[n], n, dpt[n].shape, dpt[n].dtype) for n in self.ordering)
res = np.concatenate([v[0].ravel() for v in vars_info])
return RaveledVars(res, tuple(v[1:] for v in vars_info))

@classmethod
def rmap(
cls, apt: RaveledVars, as_list=False
) -> Union[Dict[Text, np.ndarray], List[np.ndarray]]:
"""
Maps value from array space to dict space
Parameters
----------
apt: array
"""
dpt = self.dpt.copy()
if as_list:
res = []
else:
res = {}

if not isinstance(apt, RaveledVars):
raise TypeError("`apt` must be a `RaveledVars` type")

for var, slc, shp, dtyp in self.ordering.vmap:
dpt[var] = np.atleast_1d(apt)[slc].reshape(shp).astype(dtyp)
last_idx = 0
for name, shape, dtype in apt.point_map_info:
arr_len = np.prod(shape, dtype=int)
var = apt.data[last_idx : last_idx + arr_len].reshape(shape).astype(dtype)
if as_list:
res.append(var)
else:
res[name] = var
last_idx += arr_len

return dpt
return res

def mapf(self, f):
"""
Expand All @@ -123,142 +130,6 @@ def mapf(self, f):
return Compose(f, self.rmap)


class ListArrayOrdering:
"""
An ordering for a list to an array space. Takes also non theano.tensors.
Modified from pymc3 blocking.
Parameters
----------
list_arrays: list
:class:`numpy.ndarray` or :class:`theano.tensor.Tensor`
intype: str
defining the input type 'tensor' or 'numpy'
"""

def __init__(self, list_arrays, intype="numpy"):
if intype not in {"tensor", "numpy"}:
raise ValueError("intype not in {'tensor', 'numpy'}")
self.vmap = []
self.intype = intype
self.size = 0
for array in list_arrays:
if self.intype == "tensor":
name = array.name
array = array.tag.test_value
else:
name = "numpy"

slc = slice(self.size, self.size + array.size)
self.vmap.append(DataMap(len(self.vmap), slc, array.shape, array.dtype, name))
self.size += array.size


class ListToArrayBijection:
"""
A mapping between a List of arrays and an array space
Parameters
----------
ordering: :class:`ListArrayOrdering`
list_arrays: list
of :class:`numpy.ndarray`
"""

def __init__(self, ordering, list_arrays):
self.ordering = ordering
self.list_arrays = list_arrays

def fmap(self, list_arrays):
"""
Maps values from List space to array space
Parameters
----------
list_arrays: list
of :class:`numpy.ndarray`
Returns
-------
array: :class:`numpy.ndarray`
single array comprising all the input arrays
"""

array = np.empty(self.ordering.size)
for list_ind, slc, _, _, _ in self.ordering.vmap:
array[slc] = list_arrays[list_ind].ravel()
return array

def dmap(self, dpt):
"""
Maps values from dict space to List space
Parameters
----------
list_arrays: list
of :class:`numpy.ndarray`
Returns
-------
point
"""
a_list = copy.copy(self.list_arrays)

for list_ind, _, _, _, var in self.ordering.vmap:
a_list[list_ind] = dpt[var].ravel()

return a_list

def rmap(self, array):
"""
Maps value from array space to List space
Inverse operation of fmap.
Parameters
----------
array: :class:`numpy.ndarray`
Returns
-------
a_list: list
of :class:`numpy.ndarray`
"""

a_list = copy.copy(self.list_arrays)

for list_ind, slc, shp, dtype, _ in self.ordering.vmap:
a_list[list_ind] = np.atleast_1d(array)[slc].reshape(shp).astype(dtype)

return a_list


class DictToVarBijection:
"""
A mapping between a dict space and the array space for one element within the dict space
"""

def __init__(self, var, idx, dpoint):
self.var = get_var_name(var)
self.idx = idx
self.dpt = dpoint

def map(self, dpt):
return dpt[self.var][self.idx]

def rmap(self, apt):
dpt = self.dpt.copy()

dvar = dpt[self.var].copy()
dvar[self.idx] = apt

dpt[self.var] = dvar

return dpt

def mapf(self, f):
return Compose(f, self.rmap)


class Compose:
"""
Compose two functions in a pickleable way
Expand Down
16 changes: 8 additions & 8 deletions pymc3/distributions/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,7 +1357,7 @@ def dist(cls, p, **kwargs):


@_logp.register(CategoricalRV)
def categorical_logp(op, value, p_, upper):
def categorical_logp(op, value, p, upper):
r"""
Calculate log-probability of Categorical distribution at specified value.
Expand All @@ -1371,19 +1371,17 @@ def categorical_logp(op, value, p_, upper):
-------
TensorVariable
"""
p_ = self.p
k = self.k

# Clip values before using them for indexing
value_clip = tt.clip(value, 0, k - 1)
value_clip = tt.clip(value, 0, p.size - 1)

p = p_ / tt.sum(p_, axis=-1, keepdims=True)
p = p / tt.sum(p, axis=-1, keepdims=True)

if p.ndim > 1:
if p.ndim > value_clip.ndim:
value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim)
value_clip = tt.shape_padleft(value_clip, p.ndim - value_clip.ndim)
elif p.ndim < value_clip.ndim:
p = tt.shape_padleft(p, value_clip.ndim - p_.ndim)
p = tt.shape_padleft(p, value_clip.ndim - p.ndim)
pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
a = tt.log(
take_along_axis(
Expand All @@ -1394,7 +1392,9 @@ def categorical_logp(op, value, p_, upper):
else:
a = tt.log(p[value_clip])

return bound(a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1))
return bound(
a, value >= 0, value <= (p.size - 1), tt.all(p >= 0, axis=-1), tt.all(p <= 1, axis=-1)
)


class Constant(Discrete):
Expand Down
Loading

0 comments on commit 1f716ba

Please sign in to comment.