-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgp_regression_model.m
156 lines (128 loc) · 6.34 KB
/
gp_regression_model.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
classdef gp_regression_model < gpmodel
properties
type = 'regression';
end
methods
function [output1 sigma2_y,dmuy_dx, dsigma2_dx, Sigma2_y, dSigma2_dx, post] = prediction(model, theta, xtrain, ytrain, xtest, post)
regularization = model.regularization;
kernelfun = model.kernelfun;
meanfun = model.meanfun;
if isempty(post) || nargout == 7
clear('post')
comp_post= true;
else
comp_post= false;
end
if isempty(xtrain)
mu_y = meanfun(xtest, theta.mean)';
Sigma2_y = kernelfun(theta.cov, xtest, xtest, true, regularization);
sigma2_y = diag(Sigma2_y);
dmuy_dx = [];
dsigma2_dx = [];
dSigma2_dx = [];
post = [];
output1 = mu_y;
else
ytrain = ytrain(:);
ntest = size(xtest,2);
nd=size(xtrain,1);
if comp_post
K = kernelfun(theta.cov, xtrain, xtrain, true, regularization);
L = chol(K)'; %in Rasmussen and Williams L = cholesky(K) is a lower triangular matrix, whereas in matlab it is an upper one
invK = inv(K);
post.invK = invK;
post.L = L;
post.K = K;
else
K = post.K;
L = post.L;
invK = post.invK;
end
if ~isempty(xtest)
if nargout>2
[k, ~, dk_dx] = kernelfun(theta.cov, xtrain, xtest, false, 'no');
else
k = kernelfun(theta.cov, xtrain, xtest, false, 'no');
end
prior_mean_tr=meanfun(xtrain, theta.mean)';
if nargout > 2
[prior_mean_test, ~, dprior_mean_test_dx]=meanfun(xtest, theta.mean);
[ks, ~,dks_dx] = kernelfun(theta.cov, xtest, xtest, false, 'no');
V= reshape(dks_dx, ntest*ntest, [],nd);
ddiagks_dx = V(1:ntest+1:end,:,:);
else
prior_mean_test=meanfun(xtest, theta.mean);
ks = kernelfun(theta.cov, xtest, xtest, false, 'no');
end
alpha= L'\(L\(ytrain-prior_mean_tr(:))); %see Algorithm 2.1 (p19), eq 2.25 in Williams and Rasmussen, alpha = inv(K)*(ytrain-mean)
mu_y = prior_mean_test(:) + k'*alpha;
diag_ks= diag(ks);
if nargout > 1
v= L\k;
sigma2_y=diag_ks-diag(v'*v);
sigma2_y(sigma2_y<0)=0;
end
if nargout > 4
Sigma2_y = ks- v'*v; % full covariance matrix (see 2.24 Rasmussen and Williams), = ks - (k'/K)*k;
if isequal(xtrain,xtest)
Sigma2_y = (Sigma2_y + Sigma2_y')/2;
end
end
if nargout > 2
%koverK = (k'*invK); % (k'/K);
dmuy_dx = squeeze(mtimesx(dk_dx, 'T', K\(ytrain-prior_mean_tr(:)))); %dprior_mean_test_dx +
dsigma2_dx = 2*squeeze(ddiagks_dx) -2*squeeze(sum(mtimesx(dk_dx, 'T', invK).*k', 2));
dSigma2_dx = dks_dx - 2*mtimesx(mtimesx(dk_dx, 'T', invK),k);
end
output1 = mu_y;
else
output1 = post;
end
end
end
%%
function [negL, dnegL] = negloglike(model, theta, xtrain, y_tr)
N = numel(y_tr);
% covariance and derivative
if nargout>1
[K, dK] = model.kernelfun(theta.cov, xtrain,xtrain, true, model.regularization);
else
K = model.kernelfun(theta.cov, xtrain,xtrain, true, model.regularization);
end
if nargout>1
[prior_mean, dprior_mean_dtheta]=model.meanfun(xtrain, theta.mean);
else
prior_mean= model.meanfun(xtrain, theta.mean);
end
L = chol(K)'; %in Rasmussen and Williams L = cholesky(K) is a lower triangular matrix, whereas in matlab it is an upper one
alpha= L'\(L\((y_tr(:)-prior_mean(:)))); %see Algorithm 2.25 in Williams and Rasmussen
negL= 0.5*(y_tr(:)-prior_mean(:))'*alpha + sum(log(diag(L))) + 0.5*N*log(2*pi); %eq 2.30, algo 2.1
nhyp=numel(theta.cov)+numel(theta.mean); %number of hyperparameters
invK = inv(L')*inv(L);
if nargout>1
dnegL = zeros(nhyp, 1);
for j = 1:numel(theta.cov)
dnegL(j) = -0.5*trace((alpha*alpha'-invK)*dK(:, :, j));
end
dnegL(numel(theta.cov)+1:nhyp) = -dprior_mean_dtheta*alpha;
end
end
function [mu_y, dmuy_dx] = to_maximize_mean(model, theta, xtrain, ytrain, x,post)
if any(isnan(x(:)))
error('x is NaN')
end
[mu_y, sigma2_y, dmuy_dx] = model.prediction(theta, xtrain, ytrain, x, post);
dmuy_dx= squeeze(dmuy_dx);
end
function [sample_f, dsample_f_dx] = draw_sample_GP(model, theta, xtrain, ytrain, approximation)
% The decomposition corresponds to the prior and update terms in
% the decoupled bases approximation.
if isempty(model.phi)
model = approximate_kernel(model, theta, approximation);
end
approximation.phi = model.phi;
approximation.dphi_dx = model.dphi_dx;
[sample_f, dsample_f_dx] = sample_GP_precomputed_features(xtrain, ytrain, theta, model, approximation);
end
end
end