(Partial) update to PyTorch 1.4

t-vi · Feb 2, 2020 · e93fd0c · e93fd0c
1 parent c475839
commit e93fd0c
Show file tree

Hide file tree

Showing 18 changed files with 354 additions and 376 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,10 @@
 # CandleGP - Gaussian Processes in Pytorch
 *Thomas Viehmann*, [email protected]
 
+*Note:* This stems from before the Tensor/Variable merge, so it is really old.
+I recently needed bits and ported those to PyTorch 1.4, but there are rough edges w.r.t. dimensionality of quantities and there will be unported bits.
+
+
 I felt Bayesian the other day, so I ported (a tiny part of) the
 excellent [GPFlow library](https://github.com/gpflow/gpflow) to
 Pytorch.

diff --git a/candlegp/conditionals.py b/candlegp/conditionals.py
@@ -15,7 +15,6 @@
 
 
 import torch
-from torch.autograd import Variable
 import numpy
 
 def batch_tril(A):
@@ -68,11 +67,11 @@ def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False, jit
     num_data = X.size(0)  # M
     num_func = f.size(1)  # K
     Kmn = kern.K(X, Xnew)
-    Kmm = kern.K(X) + Variable(torch.eye(num_data, out=X.data.new())) * jitter_level
-    Lm = torch.potrf(Kmm, upper=False)
+    Kmm = kern.K(X) + torch.eye(num_data, dtype=X.dtype, device=X.device) * jitter_level
+    Lm = torch.cholesky(Kmm, upper=False)
 
     # Compute the projection matrix A
-    A,_ = torch.gesv(Kmn, Lm)
+    A, _ = torch.solve(Kmn, Lm)
 
     # compute the covariance due to the conditioning
     if full_cov:
@@ -85,7 +84,7 @@ def conditional(Xnew, X, kern, f, full_cov=False, q_sqrt=None, whiten=False, jit
 
     # another backsubstitution in the unwhitened case (complete the inverse of the cholesky decomposition)
     if not whiten:
-        A,_ = torch.gesv(A, Lm.t())
+        A,_ = torch.solve(A, Lm.t())
 
     # construct the conditional mean
     fmean = torch.matmul(A.t(), f)

diff --git a/candlegp/densities.py b/candlegp/densities.py
@@ -16,16 +16,13 @@
 import numpy
 import scipy.special
 import torch
-from torch.autograd import Variable
 
 def gammaln(x):
     # attention: Not differentiable!
     if numpy.isscalar(x):
         y = float(scipy.special.gammaln(x))
-    elif isinstance(x,(torch.Tensor, torch.DoubleTensor)):
-        y = type(x)(scipy.special.gammaln(x.numpy()))
-    elif isinstance(x,Variable):
-        y = Variable(type(x.data)(scipy.special.gammaln(x.data.numpy())))
+    elif isinstance(x, torch.Tensor):
+        y = torch.as_tensor(scipy.special.gammaln(x.numpy()), dtype=x.dtype)
     else:
         raise ValueError("Unsupported input type "+str(type(x)))
     return y
@@ -81,7 +78,7 @@ def multivariate_normal(x, mu, L):
     d = x - mu
     if d.dim()==1:
         d = d.unsqueeze(1)
-    alpha,_ = torch.gesv(d, L)
+    alpha,_ = torch.solve(d, L)
     alpha = alpha.squeeze(1)
     num_col = 1 if x.dim() == 1 else x.size(1)
     num_dims = x.size(0)

diff --git a/candlegp/kernels.py b/candlegp/kernels.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import torch
-from torch.autograd import Variable
 import numpy
 from . import parameter
 
@@ -212,7 +211,7 @@ def K(self, X, X2=None, presliced=False):
             d = self.variance.get().expand(X.size(0))
             return torch.diag(d)
         else:
-            return Variable(X.data.new(X.size(0),X2.size(0)).zero_())
+            return torch.zeros(X.size(0), X2.size(0), dtype=X.dtype, device=X.device)
 
 
 class Constant(Static):

diff --git a/candlegp/kullback_leiblers.py b/candlegp/kullback_leiblers.py
@@ -100,7 +100,7 @@ def gauss_kl_diag(q_mu, q_sqrt, K):
     KL += num_latent * torch.diag(L).log().sum() # Prior log-det term.
     KL += -0.5 * q_sqrt.numel()                  # constant term
     KL += - q_sqrt.log().sum()                   # Log-det of q-cov
-    K_inv,_ = torch.potrs(Variable(torch.eye(L.size(0), out=L.data.new())), L, upper=False) 
+    K_inv,_ = torch.cholesky(torch.eye(L.size(0), dtype=L.dtype, device=L.device), L, upper=False)
     KL += 0.5 * (torch.diag(K_inv).unsqueeze(1) * q_sqrt**2).sum()  # Trace term.
     return KL
 

diff --git a/candlegp/likelihoods.py b/candlegp/likelihoods.py
@@ -16,14 +16,13 @@
 
 import numpy
 import torch
-from torch.autograd import Variable
 from . import parameter
 from . import quadrature
 from . import densities
 
 class Likelihood(torch.nn.Module):
     def __init__(self, name=None):
-        super(Likelihood, self).__init__()
+        super().__init__()
         self.name = name
         self.num_gauss_hermite_points = 20
 
@@ -86,7 +85,7 @@ def predict_density(self, Fmu, Fvar, Y):
         Here, we implement a default Gauss-Hermite quadrature routine, but some
         likelihoods (Gaussian, Poisson) will implement specific cases.
         """
-        gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, ttype=type(Fmu.data))
+        gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, dtype=Fmu.dtype)
 
         gh_w = gh_w.reshape(-1, 1) / float(numpy.sqrt(numpy.pi))
         shape = Fmu.size()
@@ -117,7 +116,7 @@ def variational_expectations(self, Fmu, Fvar, Y):
         likelihoods (Gaussian, Poisson) will implement specific cases.
         """
 
-        gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, ttype=type(Fmu.data))
+        gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, dtype=Fmu.dtype)
         gh_x = gh_x.view(1, -1)
         gh_w = gh_w.view(-1, 1) / float(numpy.pi)**0.5
         shape = Fmu.size()
@@ -143,9 +142,9 @@ def _check_targets(self, Y_np):  # pylint: disable=R0201
 
 
 class Gaussian(Likelihood):
-    def __init__(self, ttype=torch.FloatTensor):
+    def __init__(self, dtype=torch.float32):
         Likelihood.__init__(self)
-        self.variance = parameter.PositiveParam(1.0, ttype=ttype)
+        self.variance = parameter.PositiveParam(torch.tensor([1.0], dtype=dtype), dtype=dtype)
 
     def logp(self, F, Y):
         return densities.gaussian(F, Y, self.variance.get())
@@ -251,13 +250,13 @@ def __init__(self, num_classes, epsilon=1e-3):
 
     def __call__(self, F):
         _,i = torch.max(F.data, 1)
-        one_hot = Variable(F.data.new(F.size(0), self.num_classes).fill_(self._eps_K1).scatter_(1,i,1-self.epsilon))
+        one_hot = torch.full((F.size(0), self.num_classes), self._eps_K1, dtype=F.dtype, device=F.device).scatter_(1, i, 1 - self.epsilon)
         return one_hot
 
     def prob_is_largest(self, Y, mu, var, gh_x, gh_w):
         Y = Y.long()
         # work out what the mean and variance is of the indicated latent function.
-        oh_on = Variable(mu.data.new(Y.numel(), self.num_classes).fill_(0.).scatter_(1,Y.data,1))
+        oh_on = torch.zeros(Y.numel(), self.num_classes, dtype=mu.dtype, device=mu.device).scatter_(1, Y.data, 1)
         mu_selected  = (oh_on * mu ).sum(1)
         var_selected = (oh_on * var).sum(1)
 
@@ -271,7 +270,7 @@ def prob_is_largest(self, Y, mu, var, gh_x, gh_w):
         cdfs = cdfs * (1 - 2e-4) + 1e-4
 
         # blank out all the distances on the selected latent function
-        oh_off = Variable(mu.data.new(Y.numel(), self.num_classes).fill_(1.).scatter_(1,Y.data,0))
+        oh_off = torch.ones(Y.numel(), self.num_classes, dtype=mu.dtype, device=mu.device).scatter_(1,Y.data,0)
         cdfs = cdfs * oh_off.unsqueeze(2) + oh_on.unsqueeze(2)
 
         # take the product over the latent functions, and the sum over the GH grid.
@@ -309,7 +308,7 @@ def logp(self, F, Y):
 
     def variational_expectations(self, Fmu, Fvar, Y):
         if isinstance(self.invlink, RobustMax):
-            gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, ttype=type(Fmu.data))
+            gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, dtype=Fmu.dtype)
             p = self.invlink.prob_is_largest(Y, Fmu, Fvar, gh_x, gh_w)
             return p * numpy.log(1 - self.invlink.epsilon) + (1. - p) * numpy.log(self.invlink._eps_K1)
         else:
@@ -318,7 +317,7 @@ def variational_expectations(self, Fmu, Fvar, Y):
     def predict_mean_and_var(self, Fmu, Fvar):
         if isinstance(self.invlink, RobustMax):
             # To compute this, we'll compute the density for each possible output
-            possible_outputs = [Variable(Fmu.data.new().long().resize_(Fmu.size(0),1).fill_(i)) for i in range(self.num_classes)]
+            possible_outputs = [torch.full((Fmu.size(0), 1), i, dtype=torch.long, device=Fmu.device) for i in range(self.num_classes)]
             ps = [self._predict_non_logged_density(Fmu, Fvar, po) for po in possible_outputs]
             ps = torch.stack([p.view(-1) for p in ps],1)
             return ps, ps - ps**2
@@ -330,7 +329,7 @@ def predict_density(self, Fmu, Fvar, Y):
 
     def _predict_non_logged_density(self, Fmu, Fvar, Y):
         if isinstance(self.invlink, RobustMax):
-            gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, ttype=type(Fmu.data))
+            gh_x, gh_w = quadrature.hermgauss(self.num_gauss_hermite_points, dtype=Fmu.dtype)
             p = self.invlink.prob_is_largest(Y, Fmu, Fvar, gh_x, gh_w)
             return p * (1 - self.invlink.epsilon) + (1. - p) * (self.invlink._eps_K1)
         else:

diff --git a/candlegp/mean_functions.py b/candlegp/mean_functions.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import torch
-from torch.autograd import Variable
 from . import parameter
 
 class MeanFunction(torch.nn.Module):
@@ -40,7 +39,7 @@ def __mul__(self, other):
 
 class Zero(MeanFunction):
     def forward(self, X):
-        return Variable(X.data.new(X.size(0),1).zero_())
+        return torch.zeros(X.size(0), 1, dtype=X.dtype, device=X.device)
 
 class Linear(MeanFunction):
     """

diff --git a/candlegp/models/__init__.py b/candlegp/models/__init__.py
@@ -5,3 +5,4 @@
 from .vgp import VGP
 from .gpmc import GPMC
 from .sgpmc import SGPMC
+from .gplvm import GPLVM, BayesianGPLVM, PCA_reduce
diff --git a/candlegp/models/gpr.py b/candlegp/models/gpr.py
@@ -14,7 +14,6 @@
 # limitations under the License.
 
 import torch
-from torch.autograd import Variable
 
 from .. import likelihoods
 from .. import densities
@@ -40,7 +39,7 @@ def __init__(self, X, Y, kern, mean_function=None, **kwargs):
         Y is a data matrix, size N x R
         kern, mean_function are appropriate GPflow objects
         """
-        likelihood = likelihoods.Gaussian(ttype=type(X.data))
+        likelihood = likelihoods.Gaussian(dtype=X.dtype)
         super(GPR,self).__init__(X, Y, kern, likelihood, mean_function, **kwargs)
         self.num_latent = Y.size(1)
 
@@ -52,8 +51,8 @@ def compute_log_likelihood(self, X = None, Y = None):
 
         """
         assert X is None and Y is None, "{} does not support minibatch mode".format(str(type(self)))
-        K = self.kern.K(self.X) + Variable(torch.eye(self.X.size(0),out=self.X.data.new())) * self.likelihood.variance.get()
-        L = torch.potrf(K, upper=False)
+        K = self.kern.K(self.X) + torch.eye(self.X.size(0), dtype=self.X.dtype, device=self.X.device) * self.likelihood.variance.get()
+        L = torch.cholesky(K, upper=False)
         m = self.mean_function(self.X)
         return densities.multivariate_normal(self.Y, m, L)
 
@@ -69,10 +68,10 @@ def predict_f(self, Xnew, full_cov=False):
 
         """
         Kx = self.kern.K(self.X, Xnew)
-        K = self.kern.K(self.X) + Variable(torch.eye(self.X.size(0),out=self.X.data.new())) * self.likelihood.variance.get()
-        L = torch.potrf(K, upper=False)
-        A,_ = torch.gesv(Kx, L)  # could use triangular solve, note gesv has B first, then A in AX=B
-        V,_ = torch.gesv(self.Y - self.mean_function(self.X),L) # could use triangular solve
+        K = self.kern.K(self.X) + torch.eye(self.X.size(0), dtype=self.X.dtype, device=self.X.device) * self.likelihood.variance.get()
+        L = torch.cholesky(K, upper=False)
+        A,_ = torch.solve(Kx, L)  # could use triangular solve, note gesv has B first, then A in AX=B
+        V,_ = torch.solve(self.Y - self.mean_function(self.X),L) # could use triangular solve
         fmean = torch.mm(A.t(), V) + self.mean_function(Xnew)
         if full_cov:
             fvar = self.kern.K(Xnew) - torch.mm(A.t(), A)

diff --git a/candlegp/models/model.py b/candlegp/models/model.py
@@ -16,7 +16,6 @@
 import abc
 import torch
 import numpy
-from torch.autograd import Variable
 
 from .. import mean_functions
 from .. import parameter
@@ -111,11 +110,11 @@ def predict_f_samples(self, Xnew, num_samples):
         Xnew.
         """
         mu, var = self.predict_f(Xnew, full_cov=True)
-        jitter = Variable(torch.eye(mu.size(0), out=mu.data.new())) * self.jitter_level
+        jitter = torch.eye(mu.size(0), dtype=mu.dtype, device=mu.device) * self.jitter_level
         samples = []
         for i in range(self.num_latent): # TV-Todo: batch??
-            L = torch.potrf(var[:, :, i] + jitter, upper=False)
-            V = Variable(mu.data.new(L.size(0), num_samples).normal_())
+            L = torch.cholesky(var[:, :, i] + jitter, upper=False)
+            V = torch.randn(L.size(0), num_samples, dtype=L.dtype, device=L.device)
             samples.append(mu[:, i:i + 1] + torch.matmul(L, V))
         return torch.stack(samples, dim=0) # TV-Todo: transpose?