Skip to content
This repository has been archived by the owner on Dec 3, 2024. It is now read-only.

reorganization #54

Merged
merged 1 commit into from
Oct 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mederrata_spmf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from .spmf import PoissonMatrixFactorization
from .poissonautoencoder import PoissonAutoencoder
from .poisson import PoissonFactorization
from .poisson import PoissonAutoencoder
2 changes: 2 additions & 0 deletions mederrata_spmf/gaussian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env python3

136 changes: 127 additions & 9 deletions mederrata_spmf/spmf.py → mederrata_spmf/poisson.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@

from bayesianquilts.model import BayesianModel
from bayesianquilts.distributions import SqrtInverseGamma, AbsHorseshoe
from bayesianquilts.nn.dense import DenseHorseshoe

from bayesianquilts.util import (
build_trainable_InverseGamma_dist, build_trainable_normal_dist,
run_chain, clip_gradients, build_surrogate_posterior,
Expand All @@ -23,7 +25,7 @@
tfb = tfp.bijectors


class PoissonMatrixFactorization(BayesianModel):
class PoissonFactorization(BayesianModel):
"""Sparse (horseshoe) poisson matrix factorization
Arguments:
object {[type]} -- [description]
Expand Down Expand Up @@ -79,12 +81,13 @@ def __init__(
strategy {[type]} -- For multi-GPU (default: {None})
decoder_function {[type]} -- f(x) (default: {None})
encoder_function {[type]} -- g(x) (default: {None})
scale_columns {bool} -- Scale the rates by the mean of the first batch (default: {True})
scale_columns {bool} -- Scale the rates by the mean of the
first batch (default: {True})
scale_row {bool} -- Scale by normalized row sums (default: {True})
dtype {[type]} -- [description] (default: {tf.float64})
"""

super(PoissonMatrixFactorization, self).__init__(
super(PoissonFactorization, self).__init__(
data=None, data_transform_fn=None, strategy=strategy, dtype=dtype)

self.scale_rows = scale_rows
Expand All @@ -99,7 +102,7 @@ def __init__(
self.set_data(
data, data_transform_fn,
compute_normalization=(column_norms is None)
)
)
if encoder_function is not None:
self.encoder_function = encoder_function
if decoder_function is not None:
Expand All @@ -119,8 +122,9 @@ def __init__(
print(
f"Feature dim: {self.feature_dim} -> Latent dim {self.latent_dim}")

def set_data(self, data, data_transform_fn=None, compute_normalization=True):
super(PoissonMatrixFactorization, self).set_data(
def set_data(
self, data, data_transform_fn=None, compute_normalization=True):
super(PoissonFactorization, self).set_data(
data, data_transform_fn)
if self.scale_columns and compute_normalization:
print("Looping through the entire dataset once to get some stats")
Expand Down Expand Up @@ -154,9 +158,9 @@ def set_data(self, data, data_transform_fn=None, compute_normalization=True):
colmeans_nonzero > 1,
colmeans_nonzero,
tf.ones_like(colmeans_nonzero)
),
),
self.dtype
)
)

if self.scale_rows:
self.xi_u_global = tf.cast(rowmean_nonzero, self.dtype)
Expand Down Expand Up @@ -644,7 +648,9 @@ def encode(self, x, u=None, s=None):
try:
tf.debugging.check_numerics(encoding, message='Checking encoding')
except Exception as e:
assert "Checking encoding : Tensor had NaN values" in encoding.message
assert (
"Checking encoding : Tensor had NaN values"
in encoding.message)
z = tf.matmul(
self.encoder_function(
tf.cast(x, self.dtype)
Expand Down Expand Up @@ -723,3 +729,115 @@ def reconstitute(self, state):
self.surrogate_distribution.trainable_variables[j].assign(
tf.cast(value, self.dtype))
# self.set_calibration_expectations()


class PoissonAutoencoder(PoissonFactorization):
var_list = []

def __init__(
self, data, data_transform_fn=None, latent_dim=None,
scale_columns=True, column_norms=None, encoder_layers=1,
decoder_layers=1, activation_function=tf.nn.softplus,
strategy=None, dtype=tf.float64, **kwargs):
"""Instantiate unconstrained dense Poisson autoencoder

Args:
data ([type]): [description]
data_transform_fn ([type], optional): [description].
Defaults to None.
latent_dim ([type], optional): [description]. Defaults to None.
scale_columns (bool, optional): [description]. Defaults to True.
column_norms ([type], optional): [description]. Defaults to None.
strategy ([type], optional): [description]. Defaults to None.
dtype ([type], optional): [description]. Defaults to tf.float64.
"""
super(DenseHorseshoe, self).__init__(
data, data_transform_fn, strategy=strategy, dtype=dtype)
self.dtype = dtype
record = next(iter(data))
indices = record['indices']
data = record['data']
self.column_norm_factor = 1.

if scale_columns:
if column_norms is not None:
self.column_norm_factor = tf.cast(
column_norms, self.dtype)
else:
self.column_norm_factor = tf.reduce_mean(
tf.cast(data, self.dtype), axis=0, keepdims=True)

if 'normalization' in record.keys():
norm = record['normalization']
data = tf.cast(data, self.dtype)
self.feature_dim = data.shape[-1]
self.latent_dim = self.feature_dim if (
latent_dim) is None else latent_dim

self.neural_network_model = DenseHorseshoe(
self.feature_dim,
[self.feature_dim]*encoder_layers + [self.latent_dim] +
[self.feature_dim]*decoder_layers + [self.feature_dim],
dtype=self.dtype)

var_list = self.neural_network_model.var_list
self.var_list = var_list
# rewrite the log_likelihood signature with the variable names
# function_string = f"lambda self, data,
# {', '.join(var_list)}:
# self.log_likelihood(
# data, {', '.join([str(v) + '=' + str(v) for v in var_list])})"
# self.log_likelihood = eval(function_string, globals(), self.__dict__)
self.joint_prior = self.neural_network_model.joint_prior
self.surrogate_distribution = build_surrogate_posterior(
self.joint_prior, self.neural_network_model.bijectors,
dtype=self.dtype,
strategy=self.strategy)
self.surrogate_vars = self.surrogate_distribution.variables

self.var_list = list(self.surrogate_distribution.variables)

self.set_calibration_expectations()

def log_likelihood(self, data, **params):
neural_networks = self.neural_network_model.assemble_networks(params)
rates = tf.math.exp(
neural_networks(
tf.cast(
data['data'],
self.neural_network_model.dtype)
/ tf.cast(
self.column_norm_factor,
self.neural_network_model.dtype)
)
)
rates = tf.cast(rates, self.dtype)
rates *= self.column_norm_factor
rv_poisson = tfd.Poisson(rate=rates)
log_lik = rv_poisson.log_prob(
tf.cast(data['data'], self.dtype)[tf.newaxis, ...])
log_lik = tf.reduce_sum(log_lik, axis=-1)
log_lik = tf.reduce_sum(log_lik, axis=-1)
return log_lik

def unormalized_log_prob_parts(self, data=None, **params):
if data is None:
# use self.data, taking the next batch
try:
data = next(self.dataset_cycler)
except tf.errors.OutOfRangeError:
self.dataset_iterator = cycle(iter(self.data))
data = next(self.dataset_iterator)

prior_parts = self.neural_network_model.joint_prior.log_prob_parts(
params)
log_likelihood = self.log_likelihood(data, **params)
prior_parts['x'] = log_likelihood
return prior_parts

def unormalized_log_prob(self, data=None, **params):
prob_parts = self.unormalized_log_prob_parts(
data, **params)
value = tf.add_n(
list(prob_parts.values()))
return value
127 changes: 0 additions & 127 deletions mederrata_spmf/poissonautoencoder.py

This file was deleted.