-
Notifications
You must be signed in to change notification settings - Fork 122
/
Copy pathdemo.m
199 lines (140 loc) · 8.04 KB
/
demo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
% Author: Relja Arandjelovic ([email protected])
% This file contains a few examples of how to train and test CNNs for place recognition, refer to README.md for setup instructions, and to our project page for all relevant information (e.g. our paper): http://www.di.ens.fr/willow/research/netvlad/
% The code samples use the GPU by default, if you want to use the CPU instead (very slow especially for training!), add `'useGPU', false` to the affected function calls (trainWeakly, addPCA, serialAllFeats, computeRepresentation)
% For a tiny example of running the training on a small dataset, which takes only a few minutes to run, refer to the end of this file.
% Set the MATLAB paths
setup;
error('Don''t run this script, it is only meant as a collection of useful commands');
% ---------- Use/test our networks
% Load our network
netID= 'vd16_tokyoTM_conv5_3_vlad_preL2_intra_white';
paths= localPaths();
load( sprintf('%s%s.mat', paths.ourCNNs, netID), 'net' );
net= relja_simplenn_tidy(net);
% Compute the image representation by simply running the forward pass using
% the network `net` on the appropriately normalized image
% (see `computeRepresentation.m`).
im= vl_imreadjpeg({which('football.jpg')}); im= im{1}; % slightly convoluted because we need the full image path for `vl_imreadjpeg`, while `imread` is not appropriate - see `help computeRepresentation`
feats= computeRepresentation(net, im); % add `'useGPU', false` if you want to use the CPU
% To compute representations for many images, use the `serialAllFeats` function
% which is much faster as it uses batches and it moves the network to
% the GPU only once:
%
% serialAllFeats(net, imPath, imageFns, outputFn);
%
% `imageFns` is a cell array containing image file names relative to the `imPath` (i.e. `[imPath, imageFns{i}]` is a valid JPEG image), the representations are saved in binary format (single 4-byte floats). Batch size used for computing the forward pass can be changed by adding the `batchSize` parameter, e.g. `'batchSize', 10`. Note that if your input images are not all of same size (they are in place recognition datasets), you should set `batchSize` to 1.
% To test the network on a place recognition dataset, set up the test dataset
dbTest= dbTokyo247();
% Set the output filenames for the database/query image representations
paths= localPaths();
dbFeatFn= sprintf('%s%s_%s_db.bin', paths.outPrefix, netID, dbTest.name);
qFeatFn = sprintf('%s%s_%s_q.bin', paths.outPrefix, netID, dbTest.name);
% Compute db/query image representations
serialAllFeats(net, dbTest.dbPath, dbTest.dbImageFns, dbFeatFn, 'batchSize', 10); % adjust batchSize depending on your GPU / network size
serialAllFeats(net, dbTest.qPath, dbTest.qImageFns, qFeatFn, 'batchSize', 1); % Tokyo 24/7 query images have different resolutions so batchSize is constrained to 1
% Measure recall@N
[recall, ~, ~, opts]= testFromFn(dbTest, dbFeatFn, qFeatFn);
plot(opts.recallNs, recall, 'ro-'); grid on; xlabel('N'); ylabel('Recall@N');
% ---------- Full train and test example: Tokyo
% Train: Tokyo Time Machine, Test: Tokyo 24/7
% Set up the train/val datasets
dbTrain= dbTokyoTimeMachine('train');
dbVal= dbTokyoTimeMachine('val');
lr= 0.0001;
% --- Train the VGG-16 network + NetVLAD, tuning down to conv5_1
sessionID= trainWeakly(dbTrain, dbVal, ...
'netID', 'vd16', 'layerName', 'conv5_3', 'backPropToLayer', 'conv5_1', ...
'method', 'vlad_preL2_intra', ...
'learningRate', lr, ...
'doDraw', true);
% Get the best network
% This can be done even if training is not finished, it will find the best network so far
[~, bestNet]= pickBestNet(sessionID);
% Either use the above network as the image representation extractor (do: finalNet= bestNet), or do whitening (recommended):
finalNet= addPCA(bestNet, dbTrain, 'doWhite', true, 'pcaDim', 4096);
% --- Test
% Set up the test dataset
dbTest= dbTokyo247();
% Set the output filenames for the database/query image representations
paths= localPaths();
dbFeatFn= sprintf('%s%s_ep%06d_%s_db.bin', paths.outPrefix, finalNet.meta.sessionID, finalNet.meta.epoch, dbTest.name);
qFeatFn = sprintf('%s%s_ep%06d_%s_q.bin', paths.outPrefix, finalNet.meta.sessionID, finalNet.meta.epoch, dbTest.name);
% Compute db/query image representations
serialAllFeats(finalNet, dbTest.dbPath, dbTest.dbImageFns, dbFeatFn, 'batchSize', 10); % adjust batchSize depending on your GPU / network size
serialAllFeats(finalNet, dbTest.qPath, dbTest.qImageFns, qFeatFn, 'batchSize', 1); % Tokyo 24/7 query images have different resolutions so batchSize is constrained to 1
% Measure recall@N
[recall, ~, ~, opts]= testFromFn(dbTest, dbFeatFn, qFeatFn);
plot(opts.recallNs, recall, 'ro-'); grid on; xlabel('N'); ylabel('Recall@N');
% --- Test smaller dimensionalities:
% All that needs to be done (only valid for NetVLAD+whitening networks!)
% to reduce the dimensionality of the NetVLAD representation below 4096 to D
% is to keep the first D dimensions and L2-normalize.
% This is done automatically in `testFromFn` using the `cropToDim` option:
cropToDims= [64, 128, 256, 512, 1024, 2048, 4096];
recalls= [];
plotN= 5;
figure;
for iCropToDim= 1:length(cropToDims)
cropToDim= cropToDims(iCropToDim);
relja_display('D= %d', cropToDim);
[recall, ~, ~, opts]= testFromFn(dbTest, dbFeatFn, qFeatFn, [], 'cropToDim', cropToDim);
whichRecall= find(opts.recallNs==plotN);
recalls= [recalls, recall(whichRecall)];
hold off;
semilogx( cropToDims(1:iCropToDim), recalls, 'bo-');
set(gca, 'XTick', cropToDims(1:iCropToDim));
xlabel('Number of dimensions'); ylabel(sprintf('Recall@%d', plotN)); grid on;
drawnow;
end
% ---------- Pittsburgh examples
% Pitts250k or Pitts30k?
doPitts250k= false;
if doPitts250k
% Pittsburgh 250k
lr= 0.0001;
else
% Pittsburgh 30k
lr= 0.001;
end
dbTrain= dbPitts(doPitts250k, 'train');
dbVal= dbPitts(doPitts250k, 'val');
dbTest= dbPitts(doPitts250k, 'test');
% Now just run the same code as above for Tokyo
% ---------- Miscellaneous
% --- Other potentially useful commands for training
% caffe vlad: backprop down to conv2
sessionID= trainWeakly(dbTrain, dbVal, ...
'netID', 'caffe', 'layerName', 'conv5', 'backPropToLayer', 'conv2', ...
'method', 'vlad_preL2_intra', ...
'learningRate', lr, ...
'doDraw', true);
% vgg16 max: backprop down to conv5
sessionID= trainWeakly(dbTrain, dbVal, ...
'netID', 'vd16', 'layerName', 'conv5_3', 'backPropToLayer', 'conv5_1', ...
'method', 'max', ...
'learningRate', lr, ...
'doDraw', true);
% --- Constructing an off-the-shelf network:
dbTrain= dbPitts(false, 'train'); % or dbTokyoTimeMachine('train');
opts.netID= 'vd16'; % or 'caffe'
opts.layerName= 'conv5_3'; % or whatever layer you want, for caffe: 'conv5'
opts.method= 'vlad_preL2_intra'; % or 'max', 'avg'
net= loadNet(opts.netID, opts.layerName); % load the network and crop at desired layer
net= addLayers(net, opts, dbTrain); % add the NetVLAD/Max/Avg layer
net= addPCA(net, dbTrain, 'doWhite', true, 'pcaDim', 4096, 'batchSize', 10); % add whitening (for non-vlad methods pick a smaller pcaDim value)
% ---------- Tiny dummy training example
% This is just to see if you got all the dependencies and configurations set up correctly.
% Get a tiny version of the Tokyo Time Machine dataset from our research page ( www.di.ens.fr/willow/research/netvlad/ ) as well as the dataset specification. Point paths.dsetRootTokyoTM in localPaths.m to its location. Run the code below to train max pooling on top of AlexNet for this tiny dataset:
dbTrain= dbTiny('train'); dbVal= dbTiny('val');
trainWeakly(dbTrain, dbVal, ...
'netID', 'caffe', 'layerName', 'conv5', ...
'method', 'max', 'backPropToLayer', 'conv5', ...
'margin', 0.1, ...
'batchSize', 4, 'learningRate', 0.01, 'lrDownFreq', 3, 'momentum', 0.9, 'weightDecay', 0.1, 'compFeatsFrequency', 10, ...
'nNegChoice', 30, 'nNegCap', 10, 'nNegCache', 10, ...
'nEpoch', 10, ...
'epochTestFrequency', 1, 'test0', true, ...
'nTestSample', inf, 'nTestRankSample', 40, ...
'saveFrequency', 15, 'doDraw', true, ...
'useGPU', true, 'numThreads', 12, ...
'info', 'tiny test');