Remove shape dependencies from DictToArrayBijection

This commit changes `DictToArrayBijection` so that it returns a `RaveledVars` datatype that contains the original raveled and concatenated vector along with the information needed to revert it back to dictionay/variables form. Simply put, the variables-to-single-vector mapping steps have been pushed away from the model object and its symbolic terms and closer to the (sampling) processes that produce and work with `ndarray` values for said terms. In doing so, we can operate under fewer unnecessarily strong assumptions (e.g. that the shapes of each term are static and equal to the initial test points), and let the sampling processes that require vector-only steps deal with any changes in the mappings.
pymc-devs · Feb 5, 2021 · c377b13 · c377b13
1 parent 33b240d
commit c377b13
Show file tree

Hide file tree

Showing 20 changed files with 268 additions and 497 deletions.
diff --git a/pymc3/blocking.py b/pymc3/blocking.py
@@ -18,21 +18,20 @@
 Classes for working with subsets of parameters.
 """
 import collections
-import copy
 
-import numpy as np
+from typing import Dict, List, Optional, Union
 
-from pymc3.util import get_var_name
+import numpy as np
 
-__all__ = ["ArrayOrdering", "DictToArrayBijection", "DictToVarBijection"]
+__all__ = ["ArrayOrdering", "DictToArrayBijection"]
 
+# `point_map_info` is a tuple of tuples containing `(name, shape, dtype)` for
+# each of the raveled variables.
+RaveledVars = collections.namedtuple("RaveledVars", "data, point_map_info")
 VarMap = collections.namedtuple("VarMap", "var, slc, shp, dtyp")
 DataMap = collections.namedtuple("DataMap", "list_ind, slc, shp, dtype, name")
 
 
-# TODO Classes and methods need to be fully documented.
-
-
 class ArrayOrdering:
     """
     An ordering for an array space
@@ -63,200 +62,67 @@ def __getitem__(self, key):
 
 
 class DictToArrayBijection:
-    """
-    A mapping between a dict space and an array space
-    """
-
-    def __init__(self, ordering, dpoint):
-        self.ordering = ordering
-        self.dpt = dpoint
+    """Map between a `dict`s of variables to an array space.
 
-        # determine smallest float dtype that will fit all data
-        if all([x.dtyp == "float16" for x in ordering.vmap]):
-            self.array_dtype = "float16"
-        elif all([x.dtyp == "float32" for x in ordering.vmap]):
-            self.array_dtype = "float32"
-        else:
-            self.array_dtype = "float64"
+    Said array space consists of all the vars raveled and then concatenated.
 
-    def map(self, dpt):
-        """
-        Maps value from dict space to array space
+    """
 
-        Parameters
-        ----------
-        dpt: dict
-        """
-        apt = np.empty(self.ordering.size, dtype=self.array_dtype)
-        for var, slc, _, _ in self.ordering.vmap:
-            apt[slc] = dpt[var].ravel()
-        return apt
+    @staticmethod
+    def map(var_dict: Dict[str, np.ndarray]) -> RaveledVars:
+        """Map a dictionary of names and variables to a concatenated 1D array space."""
+        vars_info = tuple((v, k, v.shape, v.dtype) for k, v in var_dict.items())
+        res = np.concatenate([v[0].ravel() for v in vars_info])
+        return RaveledVars(res, tuple(v[1:] for v in vars_info))
 
-    def rmap(self, apt):
-        """
-        Maps value from array space to dict space
+    @staticmethod
+    def rmap(
+        array: RaveledVars, as_list: Optional[bool] = False
+    ) -> Union[Dict[str, np.ndarray], List[np.ndarray]]:
+        """Map 1D concatenated array to a dictionary of variables in their original spaces.
 
         Parameters
-        ----------
-        apt: array
+        ==========
+        array
+            The array to map.
+        as_list
+            When ``True``, return a list of the original variables instead of a
+            ``dict`` keyed each variable's name.
         """
-        dpt = self.dpt.copy()
+        if as_list:
+            res = []
+        else:
+            res = {}
+
+        if not isinstance(array, RaveledVars):
+            raise TypeError("`apt` must be a `RaveledVars` type")
 
-        for var, slc, shp, dtyp in self.ordering.vmap:
-            dpt[var] = np.atleast_1d(apt)[slc].reshape(shp).astype(dtyp)
+        last_idx = 0
+        for name, shape, dtype in array.point_map_info:
+            arr_len = np.prod(shape, dtype=int)
+            var = array.data[last_idx : last_idx + arr_len].reshape(shape).astype(dtype)
+            if as_list:
+                res.append(var)
+            else:
+                res[name] = var
+            last_idx += arr_len
 
-        return dpt
+        return res
 
-    def mapf(self, f):
+    @classmethod
+    def mapf(cls, f):
         """
          function f: DictSpace -> T to ArraySpace -> T
 
         Parameters
         ----------
-
         f: dict -> T
 
         Returns
         -------
         f: array -> T
         """
-        return Compose(f, self.rmap)
-
-
-class ListArrayOrdering:
-    """
-    An ordering for a list to an array space. Takes also non theano.tensors.
-    Modified from pymc3 blocking.
-
-    Parameters
-    ----------
-    list_arrays: list
-        :class:`numpy.ndarray` or :class:`theano.tensor.Tensor`
-    intype: str
-        defining the input type 'tensor' or 'numpy'
-    """
-
-    def __init__(self, list_arrays, intype="numpy"):
-        if intype not in {"tensor", "numpy"}:
-            raise ValueError("intype not in {'tensor', 'numpy'}")
-        self.vmap = []
-        self.intype = intype
-        self.size = 0
-        for array in list_arrays:
-            if self.intype == "tensor":
-                name = array.name
-                array = array.tag.test_value
-            else:
-                name = "numpy"
-
-            slc = slice(self.size, self.size + array.size)
-            self.vmap.append(DataMap(len(self.vmap), slc, array.shape, array.dtype, name))
-            self.size += array.size
-
-
-class ListToArrayBijection:
-    """
-    A mapping between a List of arrays and an array space
-
-    Parameters
-    ----------
-    ordering: :class:`ListArrayOrdering`
-    list_arrays: list
-        of :class:`numpy.ndarray`
-    """
-
-    def __init__(self, ordering, list_arrays):
-        self.ordering = ordering
-        self.list_arrays = list_arrays
-
-    def fmap(self, list_arrays):
-        """
-        Maps values from List space to array space
-
-        Parameters
-        ----------
-        list_arrays: list
-            of :class:`numpy.ndarray`
-
-        Returns
-        -------
-        array: :class:`numpy.ndarray`
-            single array comprising all the input arrays
-        """
-
-        array = np.empty(self.ordering.size)
-        for list_ind, slc, _, _, _ in self.ordering.vmap:
-            array[slc] = list_arrays[list_ind].ravel()
-        return array
-
-    def dmap(self, dpt):
-        """
-        Maps values from dict space to List space
-
-        Parameters
-        ----------
-        list_arrays: list
-            of :class:`numpy.ndarray`
-
-        Returns
-        -------
-        point
-        """
-        a_list = copy.copy(self.list_arrays)
-
-        for list_ind, _, _, _, var in self.ordering.vmap:
-            a_list[list_ind] = dpt[var].ravel()
-
-        return a_list
-
-    def rmap(self, array):
-        """
-        Maps value from array space to List space
-        Inverse operation of fmap.
-
-        Parameters
-        ----------
-        array: :class:`numpy.ndarray`
-
-        Returns
-        -------
-        a_list: list
-            of :class:`numpy.ndarray`
-        """
-
-        a_list = copy.copy(self.list_arrays)
-
-        for list_ind, slc, shp, dtype, _ in self.ordering.vmap:
-            a_list[list_ind] = np.atleast_1d(array)[slc].reshape(shp).astype(dtype)
-
-        return a_list
-
-
-class DictToVarBijection:
-    """
-    A mapping between a dict space and the array space for one element within the dict space
-    """
-
-    def __init__(self, var, idx, dpoint):
-        self.var = get_var_name(var)
-        self.idx = idx
-        self.dpt = dpoint
-
-    def map(self, dpt):
-        return dpt[self.var][self.idx]
-
-    def rmap(self, apt):
-        dpt = self.dpt.copy()
-
-        dvar = dpt[self.var].copy()
-        dvar[self.idx] = apt
-
-        dpt[self.var] = dvar
-
-        return dpt
-
-    def mapf(self, f):
-        return Compose(f, self.rmap)
+        return Compose(f, cls.rmap)
 
 
 class Compose:

diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -1341,7 +1341,7 @@ def dist(cls, p, **kwargs):
 
 
 @_logp.register(CategoricalRV)
-def categorical_logp(op, value, p_, upper):
+def categorical_logp(op, value, p, upper):
     r"""
     Calculate log-probability of Categorical distribution at specified value.
 
@@ -1355,19 +1355,17 @@ def categorical_logp(op, value, p_, upper):
     -------
     TensorVariable
     """
-    p_ = self.p
-    k = self.k
 
     # Clip values before using them for indexing
-    value_clip = tt.clip(value, 0, k - 1)
+    value_clip = tt.clip(value, 0, p.size - 1)
 
-    p = p_ / tt.sum(p_, axis=-1, keepdims=True)
+    p = p / tt.sum(p, axis=-1, keepdims=True)
 
     if p.ndim > 1:
         if p.ndim > value_clip.ndim:
-            value_clip = tt.shape_padleft(value_clip, p_.ndim - value_clip.ndim)
+            value_clip = tt.shape_padleft(value_clip, p.ndim - value_clip.ndim)
         elif p.ndim < value_clip.ndim:
-            p = tt.shape_padleft(p, value_clip.ndim - p_.ndim)
+            p = tt.shape_padleft(p, value_clip.ndim - p.ndim)
         pattern = (p.ndim - 1,) + tuple(range(p.ndim - 1))
         a = tt.log(
             take_along_axis(
@@ -1378,7 +1376,9 @@ def categorical_logp(op, value, p_, upper):
     else:
         a = tt.log(p[value_clip])
 
-    return bound(a, value >= 0, value <= (k - 1), tt.all(p_ >= 0, axis=-1), tt.all(p <= 1, axis=-1))
+    return bound(
+        a, value >= 0, value <= (p.size - 1), tt.all(p >= 0, axis=-1), tt.all(p <= 1, axis=-1)
+    )
 
 
 class Constant(Discrete):