Skip to content

Commit

Permalink
(Partial) update to PyTorch 1.4
Browse files Browse the repository at this point in the history
  • Loading branch information
t-vi committed Feb 2, 2020
1 parent c475839 commit e93fd0c
Show file tree
Hide file tree
Showing 18 changed files with 354 additions and 376 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# CandleGP - Gaussian Processes in Pytorch
*Thomas Viehmann*, [email protected]

*Note:* This stems from before the Tensor/Variable merge, so it is really old.
I recently needed bits and ported those to PyTorch 1.4, but there are rough edges w.r.t. dimensionality of quantities and there will be unported bits.


I felt Bayesian the other day, so I ported (a tiny part of) the
excellent [GPFlow library](https://github.com/gpflow/gpflow) to
Pytorch.
Expand Down
9 changes: 4 additions & 5 deletions candlegp/conditionals.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@


import torch
from torch.autograd import Variable
import numpy

def batch_tril(A):
Expand Down Expand Up @@ -68,11 +67,11 @@ def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False, jit
num_data = X.size(0) # M
num_func = f.size(1) # K
Kmn = kern.K(X, Xnew)
Kmm = kern.K(X) + Variable(torch.eye(num_data, out=X.data.new())) * jitter_level
Lm = torch.potrf(Kmm, upper=False)
Kmm = kern.K(X) + torch.eye(num_data, dtype=X.dtype, device=X.device) * jitter_level
Lm = torch.cholesky(Kmm, upper=False)

# Compute the projection matrix A
A,_ = torch.gesv(Kmn, Lm)
A, _ = torch.solve(Kmn, Lm)

# compute the covariance due to the conditioning
if full_cov:
Expand All @@ -85,7 +84,7 @@ def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False, jit

# another backsubstitution in the unwhitened case (complete the inverse of the cholesky decomposition)
if not whiten:
A,_ = torch.gesv(A, Lm.t())
A,_ = torch.solve(A, Lm.t())

# construct the conditional mean
fmean = torch.matmul(A.t(), f)
Expand Down
9 changes: 3 additions & 6 deletions candlegp/densities.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,13 @@
import numpy
import scipy.special
import torch
from torch.autograd import Variable

def gammaln(x):
# attention: Not differentiable!
if numpy.isscalar(x):
y = float(scipy.special.gammaln(x))
elif isinstance(x,(torch.Tensor, torch.DoubleTensor)):
y = type(x)(scipy.special.gammaln(x.numpy()))
elif isinstance(x,Variable):
y = Variable(type(x.data)(scipy.special.gammaln(x.data.numpy())))
elif isinstance(x, torch.Tensor):
y = torch.as_tensor(scipy.special.gammaln(x.numpy()), dtype=x.dtype)
else:
raise ValueError("Unsupported input type "+str(type(x)))
return y
Expand Down Expand Up @@ -81,7 +78,7 @@ def multivariate_normal(x, mu, L):
d = x - mu
if d.dim()==1:
d = d.unsqueeze(1)
alpha,_ = torch.gesv(d, L)
alpha,_ = torch.solve(d, L)
alpha = alpha.squeeze(1)
num_col = 1 if x.dim() == 1 else x.size(1)
num_dims = x.size(0)
Expand Down
3 changes: 1 addition & 2 deletions candlegp/kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# limitations under the License.

import torch
from torch.autograd import Variable
import numpy
from . import parameter

Expand Down Expand Up @@ -212,7 +211,7 @@ def K(self, X, X2=None, presliced=False):
d = self.variance.get().expand(X.size(0))
return torch.diag(d)
else:
return Variable(X.data.new(X.size(0),X2.size(0)).zero_())
return torch.zeros(X.size(0), X2.size(0), dtype=X.dtype, device=X.device)


class Constant(Static):
Expand Down
2 changes: 1 addition & 1 deletion candlegp/kullback_leiblers.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def gauss_kl_diag(q_mu, q_sqrt, K):
KL += num_latent * torch.diag(L).log().sum() # Prior log-det term.
KL += -0.5 * q_sqrt.numel() # constant term
KL += - q_sqrt.log().sum() # Log-det of q-cov
K_inv,_ = torch.potrs(Variable(torch.eye(L.size(0), out=L.data.new())), L, upper=False)
K_inv,_ = torch.cholesky(torch.eye(L.size(0), dtype=L.dtype, device=L.device), L, upper=False)
KL += 0.5 * (torch.diag(K_inv).unsqueeze(1) * q_sqrt**2).sum() # Trace term.
return KL

Expand Down
23 changes: 11 additions & 12 deletions candlegp/likelihoods.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@

import numpy
import torch
from torch.autograd import Variable
from . import parameter
from . import quadrature
from . import densities

class Likelihood(torch.nn.Module):
def __init__(self, name=None):
super(Likelihood, self).__init__()
super().__init__()
self.name = name
self.num_gauss_hermite_points = 20

Expand Down Expand Up @@ -86,7 +85,7 @@ def predict_density(self, Fmu, Fvar, Y):
Here, we implement a default Gauss-Hermite quadrature routine, but some
likelihoods (Gaussian, Poisson) will implement specific cases.
"""
gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, ttype=type(Fmu.data))
gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, dtype=Fmu.dtype)

gh_w = gh_w.reshape(-1, 1) / float(numpy.sqrt(numpy.pi))
shape = Fmu.size()
Expand Down Expand Up @@ -117,7 +116,7 @@ def variational_expectations(self, Fmu, Fvar, Y):
likelihoods (Gaussian, Poisson) will implement specific cases.
"""

gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, ttype=type(Fmu.data))
gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, dtype=Fmu.dtype)
gh_x = gh_x.view(1, -1)
gh_w = gh_w.view(-1, 1) / float(numpy.pi)**0.5
shape = Fmu.size()
Expand All @@ -143,9 +142,9 @@ def _check_targets(self, Y_np): # pylint: disable=R0201


class Gaussian(Likelihood):
def __init__(self, ttype=torch.FloatTensor):
def __init__(self, dtype=torch.float32):
Likelihood.__init__(self)
self.variance = parameter.PositiveParam(1.0, ttype=ttype)
self.variance = parameter.PositiveParam(torch.tensor([1.0], dtype=dtype), dtype=dtype)

def logp(self, F, Y):
return densities.gaussian(F, Y, self.variance.get())
Expand Down Expand Up @@ -251,13 +250,13 @@ def __init__(self, num_classes, epsilon=1e-3):

def __call__(self, F):
_,i = torch.max(F.data, 1)
one_hot = Variable(F.data.new(F.size(0), self.num_classes).fill_(self._eps_K1).scatter_(1,i,1-self.epsilon))
one_hot = torch.full((F.size(0), self.num_classes), self._eps_K1, dtype=F.dtype, device=F.device).scatter_(1, i, 1 - self.epsilon)
return one_hot

def prob_is_largest(self, Y, mu, var, gh_x, gh_w):
Y = Y.long()
# work out what the mean and variance is of the indicated latent function.
oh_on = Variable(mu.data.new(Y.numel(), self.num_classes).fill_(0.).scatter_(1,Y.data,1))
oh_on = torch.zeros(Y.numel(), self.num_classes, dtype=mu.dtype, device=mu.device).scatter_(1, Y.data, 1)
mu_selected = (oh_on * mu ).sum(1)
var_selected = (oh_on * var).sum(1)

Expand All @@ -271,7 +270,7 @@ def prob_is_largest(self, Y, mu, var, gh_x, gh_w):
cdfs = cdfs * (1 - 2e-4) + 1e-4

# blank out all the distances on the selected latent function
oh_off = Variable(mu.data.new(Y.numel(), self.num_classes).fill_(1.).scatter_(1,Y.data,0))
oh_off = torch.ones(Y.numel(), self.num_classes, dtype=mu.dtype, device=mu.device).scatter_(1,Y.data,0)
cdfs = cdfs * oh_off.unsqueeze(2) + oh_on.unsqueeze(2)

# take the product over the latent functions, and the sum over the GH grid.
Expand Down Expand Up @@ -309,7 +308,7 @@ def logp(self, F, Y):

def variational_expectations(self, Fmu, Fvar, Y):
if isinstance(self.invlink, RobustMax):
gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, ttype=type(Fmu.data))
gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, dtype=Fmu.dtype)
p = self.invlink.prob_is_largest(Y, Fmu, Fvar, gh_x, gh_w)
return p * numpy.log(1 - self.invlink.epsilon) + (1. - p) * numpy.log(self.invlink._eps_K1)
else:
Expand All @@ -318,7 +317,7 @@ def variational_expectations(self, Fmu, Fvar, Y):
def predict_mean_and_var(self, Fmu, Fvar):
if isinstance(self.invlink, RobustMax):
# To compute this, we'll compute the density for each possible output
possible_outputs = [Variable(Fmu.data.new().long().resize_(Fmu.size(0),1).fill_(i)) for i in range(self.num_classes)]
possible_outputs = [torch.full((Fmu.size(0), 1), i, dtype=torch.long, device=Fmu.device) for i in range(self.num_classes)]
ps = [self._predict_non_logged_density(Fmu, Fvar, po) for po in possible_outputs]
ps = torch.stack([p.view(-1) for p in ps],1)
return ps, ps - ps**2
Expand All @@ -330,7 +329,7 @@ def predict_density(self, Fmu, Fvar, Y):

def _predict_non_logged_density(self, Fmu, Fvar, Y):
if isinstance(self.invlink, RobustMax):
gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, ttype=type(Fmu.data))
gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, dtype=Fmu.dtype)
p = self.invlink.prob_is_largest(Y, Fmu, Fvar, gh_x, gh_w)
return p * (1 - self.invlink.epsilon) + (1. - p) * (self.invlink._eps_K1)
else:
Expand Down
3 changes: 1 addition & 2 deletions candlegp/mean_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# limitations under the License.

import torch
from torch.autograd import Variable
from . import parameter

class MeanFunction(torch.nn.Module):
Expand All @@ -40,7 +39,7 @@ def __mul__(self, other):

class Zero(MeanFunction):
def forward(self, X):
return Variable(X.data.new(X.size(0),1).zero_())
return torch.zeros(X.size(0), 1, dtype=X.dtype, device=X.device)

class Linear(MeanFunction):
"""
Expand Down
1 change: 1 addition & 0 deletions candlegp/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .vgp import VGP
from .gpmc import GPMC
from .sgpmc import SGPMC
from .gplvm import GPLVM, BayesianGPLVM, PCA_reduce
15 changes: 7 additions & 8 deletions candlegp/models/gpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
# limitations under the License.

import torch
from torch.autograd import Variable

from .. import likelihoods
from .. import densities
Expand All @@ -40,7 +39,7 @@ def __init__(self, X, Y, kern, mean_function=None, **kwargs):
Y is a data matrix, size N x R
kern, mean_function are appropriate GPflow objects
"""
likelihood = likelihoods.Gaussian(ttype=type(X.data))
likelihood = likelihoods.Gaussian(dtype=X.dtype)
super(GPR,self).__init__(X, Y, kern, likelihood, mean_function, **kwargs)
self.num_latent = Y.size(1)

Expand All @@ -52,8 +51,8 @@ def compute_log_likelihood(self, X = None, Y = None):
"""
assert X is None and Y is None, "{} does not support minibatch mode".format(str(type(self)))
K = self.kern.K(self.X) + Variable(torch.eye(self.X.size(0),out=self.X.data.new())) * self.likelihood.variance.get()
L = torch.potrf(K, upper=False)
K = self.kern.K(self.X) + torch.eye(self.X.size(0), dtype=self.X.dtype, device=self.X.device) * self.likelihood.variance.get()
L = torch.cholesky(K, upper=False)
m = self.mean_function(self.X)
return densities.multivariate_normal(self.Y, m, L)

Expand All @@ -69,10 +68,10 @@ def predict_f(self, Xnew, full_cov=False):
"""
Kx = self.kern.K(self.X, Xnew)
K = self.kern.K(self.X) + Variable(torch.eye(self.X.size(0),out=self.X.data.new())) * self.likelihood.variance.get()
L = torch.potrf(K, upper=False)
A,_ = torch.gesv(Kx, L) # could use triangular solve, note gesv has B first, then A in AX=B
V,_ = torch.gesv(self.Y - self.mean_function(self.X),L) # could use triangular solve
K = self.kern.K(self.X) + torch.eye(self.X.size(0), dtype=self.X.dtype, device=self.X.device) * self.likelihood.variance.get()
L = torch.cholesky(K, upper=False)
A,_ = torch.solve(Kx, L) # could use triangular solve, note gesv has B first, then A in AX=B
V,_ = torch.solve(self.Y - self.mean_function(self.X),L) # could use triangular solve
fmean = torch.mm(A.t(), V) + self.mean_function(Xnew)
if full_cov:
fvar = self.kern.K(Xnew) - torch.mm(A.t(), A)
Expand Down
7 changes: 3 additions & 4 deletions candlegp/models/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import abc
import torch
import numpy
from torch.autograd import Variable

from .. import mean_functions
from .. import parameter
Expand Down Expand Up @@ -111,11 +110,11 @@ def predict_f_samples(self, Xnew, num_samples):
Xnew.
"""
mu, var = self.predict_f(Xnew, full_cov=True)
jitter = Variable(torch.eye(mu.size(0), out=mu.data.new())) * self.jitter_level
jitter = torch.eye(mu.size(0), dtype=mu.dtype, device=mu.device) * self.jitter_level
samples = []
for i in range(self.num_latent): # TV-Todo: batch??
L = torch.potrf(var[:, :, i] + jitter, upper=False)
V = Variable(mu.data.new(L.size(0), num_samples).normal_())
L = torch.cholesky(var[:, :, i] + jitter, upper=False)
V = torch.randn(L.size(0), num_samples, dtype=L.dtype, device=L.device)
samples.append(mu[:, i:i + 1] + torch.matmul(L, V))
return torch.stack(samples, dim=0) # TV-Todo: transpose?

Expand Down
Loading

0 comments on commit e93fd0c

Please sign in to comment.