-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathlle.m
137 lines (125 loc) · 4.64 KB
/
lle.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
function [mappedX, mapping] = lle(X, no_dims, k, eig_impl)
%LLE Runs the locally linear embedding algorithm
%
% mappedX = lle(X, no_dims, k, eig_impl)
%
% Runs the local linear embedding algorithm on dataset X to reduces its
% dimensionality to no_dims. In the LLE algorithm, the number of neighbors
% can be specified by k.
% The function returns the embedded coordinates in mappedX.
%
%
% This file is part of the Matlab Toolbox for Dimensionality Reduction v0.7.1b.
% The toolbox can be obtained from http://homepage.tudelft.nl/19j49
% You are free to use, change, or redistribute this code in any way you
% want for non-commercial purposes. However, it is appreciated if you
% maintain the name of the original author.
%
% (C) Laurens van der Maaten, 2010
% University California, San Diego / Delft University of Technology
if ~exist('no_dims', 'var')
no_dims = 2;
end
if ~exist('k', 'var')
k = 12;
end
if ~exist('eig_impl', 'var')
eig_impl = 'Matlab';
end
% Get dimensionality and number of dimensions
[n, d] = size(X);
% Compute pairwise distances and find nearest neighbors (vectorized implementation)
disp('Finding nearest neighbors...');
if ~ischar(k)
[distance, neighborhood] = find_nn(X, k + 1);
else
[distance, neighborhood] = find_nn(X, k);
end
% Identify largest connected component of the neighborhood graph
blocks = components(distance)';
count = zeros(1, max(blocks));
for i=1:max(blocks)
count(i) = length(find(blocks == i));
end
[count, block_no] = max(count);
conn_comp = find(blocks == block_no);
% Update the neighborhood relations
tmp = 1:n;
tmp = tmp(conn_comp);
new_ind = zeros(n, 1);
for i=1:n
ii = find(tmp == i);
if ~isempty(ii), new_ind(i) = ii; end
end
neighborhood = neighborhood(conn_comp, 2:k+1)';
for i=1:n
neighborhood(neighborhood == i) = new_ind(i);
end
n = numel(conn_comp);
X = X(conn_comp,:)';
max_k = size(neighborhood, 1);
% Find reconstruction weights for all points by solving the MSE problem
% of reconstructing a point from each neighbours. A used constraint is
% that the sum of the reconstruction weights for a point should be 1.
disp('Compute reconstruction weights...');
if k > d
tol = 1e-5;
else
tol = 0;
end
% Construct reconstruction weight matrix
W = zeros(max_k, n);
for i=1:n
nbhd = neighborhood(:,i);
nbhd = nbhd(nbhd ~= 0);
kt = numel(nbhd);
z = bsxfun(@minus, X(:,nbhd), X(:,i)); % Shift point to origin
C = z' * z; % Compute local covariance
C = C + eye(kt, kt) * tol * trace(C); % Regularization of covariance (if K > D)
wi = C \ ones(kt, 1); % Solve linear system
wi = wi / sum(wi); % Make sure that sum is 1
W(:,i) = [wi; nan(max_k - kt, 1)];
end
% Now that we have the reconstruction weights matrix, we define the
% sparse cost matrix M = (I-W)'*(I-W).
M = sparse(1:n, 1:n, ones(1, n), n, n, 4 * max_k * n);
for i=1:n
w = W(:,i);
j = neighborhood(:,i);
indices = find(j ~= 0 & ~isnan(w));
j = j(indices);
w = w(indices);
M(i, j) = M(i, j) - w';
M(j, i) = M(j, i) - w;
M(j, j) = M(j, j) + w * w';
end
% For sparse datasets, we might end up with NaNs or Infs in M. We just set them to zero for now...
M(isnan(M)) = 0;
M(isinf(M)) = 0;
% The embedding is computed from the bottom eigenvectors of this cost matrix
disp('Compute embedding (solve eigenproblem)...');
tol = 0;
if strcmp(eig_impl, 'JDQR')
options.Disp = 0;
options.LSolver = 'bicgstab';
[mappedX, eigenvals] = jdqr(M, no_dims + 1, tol, options);
else
options.disp = 0;
options.isreal = 1;
options.issym = 1;
[mappedX, eigenvals] = eigs(M, no_dims + 1, tol, options); % only need bottom (no_dims + 1) eigenvectors
end
[eigenvals, ind] = sort(diag(eigenvals), 'ascend');
if size(mappedX, 2) < no_dims + 1
no_dims = size(mappedX, 2) - 1;
warning(['Target dimensionality reduced to ' num2str(no_dims) '...']);
end
eigenvals = eigenvals(2:no_dims + 1);
mappedX = mappedX(:,ind(2:no_dims + 1)); % throw away zero eigenvector/value
% Save information on the mapping
mapping.k = k;
mapping.X = X';
mapping.vec = mappedX;
mapping.val = eigenvals;
mapping.conn_comp = conn_comp;
mapping.nbhd = distance;