-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy pathTest_OxfordParis.py
354 lines (294 loc) · 15 KB
/
Test_OxfordParis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import numpy as np
import os
from os.path import join
import shutil
from sklearn.preprocessing import normalize
from collections import OrderedDict
import Resnet
from tqdm import tqdm
import cv2
import argparse
import subprocess
from Common import get_rmac_region_coordinates
from Common import pack_regions_for_network
from Common import L2Normalization
from Common import Shift
from Common import RoIPool
import warnings
warnings.simplefilter("ignore", DeprecationWarning)
class ContextAwareRegionalAttentionNetwork(nn.Module):
def __init__(self, spatial_scale, pooled_height = 1, pooled_width = 1):
super(ContextAwareRegionalAttentionNetwork, self).__init__()
self.pooled_width = int(pooled_width)
self.pooled_height = int(pooled_height)
self.spatial_scale = float(spatial_scale)
self.conv_att_1 = nn.Conv1d(4096, 64, 1, padding=0)
self.tanh = nn.Tanh()
self.conv_att_2 = nn.Conv1d(64, 1, 1, padding=0)
self.sp_att_2 = nn.Softplus()
def forward(self, features, rois):
batch_size, num_channels, data_height, data_width = features.size()
num_rois = rois.size()[0]
outputs = Variable(torch.zeros(num_rois, num_channels * 2,
self.pooled_height,
self.pooled_width))
if features.is_cuda:
outputs = outputs.cuda(torch.cuda.device_of(features).idx)
# Based on roi pooling code of pytorch but, the only difference is to change max pooling to mean pooling
for roi_ind, roi in enumerate(rois):
batch_ind = int(roi[0].data[0])
roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi_start_w, roi_start_h, roi_end_w, roi_end_h = torch.round(
roi[1:] * self.spatial_scale).data.cpu().numpy().astype(int)
roi_width = max(roi_end_w - roi_start_w + 1, 1)
roi_height = max(roi_end_h - roi_start_h + 1, 1)
bin_size_w = float(roi_width) / float(self.pooled_width)
bin_size_h = float(roi_height) / float(self.pooled_height)
for ph in range(self.pooled_height):
hstart = int(np.floor(ph * bin_size_h))
hend = int(np.ceil((ph + 1) * bin_size_h))
hstart = min(data_height, max(0, hstart + roi_start_h))
hend = min(data_height, max(0, hend + roi_start_h))
for pw in range(self.pooled_width):
wstart = int(np.floor(pw * bin_size_w))
wend = int(np.ceil((pw + 1) * bin_size_w))
wstart = min(data_width, max(0, wstart + roi_start_w))
wend = min(data_width, max(0, wend + roi_start_w))
is_empty = (hend <= hstart) or (wend <= wstart)
if is_empty:
outputs[roi_ind, :, ph, pw] = 0
else:
data = features[batch_ind]
# mean pooling with both of regional feature map and global feature map
outputs[roi_ind, :, ph, pw] = torch.cat((torch.mean(
torch.mean(data[:, hstart:hend, wstart:wend], 1, keepdim=True), 2, keepdim=True).view(-1)
, torch.mean(
torch.mean(data, 1, keepdim=True), 2, keepdim=True).view(-1)), 0) # noqa
# Reshpae
outputs = outputs.squeeze(2).squeeze(2)
outputs = outputs.transpose(0, 1).unsqueeze(0) # (1, # channel, #batch * # regions)
#Calculate regional attention weights with context-aware regional feature vectors
k = self.tanh(self.conv_att_1(outputs))
k = self.sp_att_2(self.conv_att_2(k)) # (1, 1, #batch * # regions)
k = torch.squeeze(k, 1)
return k
class OurNet(nn.Module):
def __init__(self):
super(OurNet, self).__init__()
self.l2norm = L2Normalization()
#RoI max pooling
self.r_mac_pool = RoIPool(1, 1, 0.03125)
#Define regional attention network
self.region_attention = ContextAwareRegionalAttentionNetwork(spatial_scale = 0.03125)
self.pca_shift = Shift(2048)
self.pca_fc = nn.Linear(2048, 2048, bias=False)
#Load the PCA weights learned with off-the-shelf Resnet101
self.pca_fc.weight.data = torch.Tensor(np.load('weights/pca_components.npy'))
self.pca_shift.bias.data = torch.Tensor(np.load('weights/pca_mean.npy'))
self.resnet = Resnet.resnet101(pretrained=False)
#Load off-the-shelf Resnet101 of caffe version.
dic = torch.load('weights/resnet101_caffeProto.pth', map_location=lambda storage, loc: storage)
self.resnet.load_state_dict(dic, strict=False)
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.xavier_normal(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
if isinstance(m, nn.Linear):
nn.init.xavier_normal(m.weight.data)
if m.bias is not None:
m.bias.data.zero_()
def forward(self, x, ):
#Calculate R-MAC regions (Region sampling)
batched_rois = [get_rmac_region_coordinates(x.shape[2], x.shape[3], 5) for i in range(x.shape[0])]
region_size = len(batched_rois[0])
rois = Variable(torch.FloatTensor(pack_regions_for_network(batched_rois)))
h = x
#Extract feature map
h = self.resnet(x) # ( #batch, #channel, #h, #w)
#R-MAC module
g = self.r_mac_pool(h, rois)
g = g.squeeze(2).squeeze(2) # (#batch * # regions, #channel)
g = self.pca_shift(g) # PCA
g = self.pca_fc(g)
g = self.l2norm(g) # normalize each region
#Regional attention module
g2 = self.region_attention(h, rois)
g2 = g2.squeeze(0).squeeze(0) # (# batch * region)
#Weighted mean
g = torch.mul(g.transpose(1, 0), g2).transpose(1, 0) # regional weighted feature (# batch * region, #channel)
g = g.contiguous()
g = g.view(torch.Size([h.size(0), -1, h.size(1)])) # (#batch, # region, # channel)
g = torch.transpose(g, 1, 2) # (#batch * #channel, #region)
g = torch.mean(g, 2)
#Final L2
g = self.l2norm(g)
return g
class ImageHelper:
def __init__(self, S, means):
self.S = S
self.means = means
def get_features(self, I, net, gpu_num):
output = net(Variable(torch.from_numpy(I).cuda(gpu_num)))
output = np.squeeze(output.cpu().data.numpy())
return output
def load_and_prepare_image(self, fname, roi=None):
# Read image, get aspect ratio, and resize such as the largest side equals S
im = cv2.imread(fname)
im_size_hw = np.array(im.shape[0:2])
ratio = float(self.S) / np.max(im_size_hw)
new_size = tuple(np.round(im_size_hw * ratio).astype(np.int32))
im_resized = cv2.resize(im, (new_size[1], new_size[0]))
# If there is a roi, adapt the roi to the new size and crop. Do not rescale
# the image once again
if roi is not None:
roi = np.round(roi * ratio).astype(np.int32)
im_resized = im_resized[roi[1]:roi[3], roi[0]:roi[2], :]
# Transpose for network and subtract mean
I = im_resized.transpose(2, 0, 1) - self.means
return I
class Dataset:
# This code is from Gordo et al at https://github.com/figitaki/deep-retrieval and slightly modified.
def __init__(self, path, eval_binary_path):
self.path = path
self.eval_binary_path = eval_binary_path
# Some images from the Paris dataset are corrupted. Standard practice is
# to ignore them
self.blacklisted = set(["paris_louvre_000136",
"paris_louvre_000146",
"paris_moulinrouge_000422",
"paris_museedorsay_001059",
"paris_notredame_000188",
"paris_pantheon_000284",
"paris_pantheon_000960",
"paris_pantheon_000974",
"paris_pompidou_000195",
"paris_pompidou_000196",
"paris_pompidou_000201",
"paris_pompidou_000467",
"paris_pompidou_000640",
"paris_sacrecoeur_000299",
"paris_sacrecoeur_000330",
"paris_sacrecoeur_000353",
"paris_triomphe_000662",
"paris_triomphe_000833",
"paris_triomphe_000863",
"paris_triomphe_000867"])
self.load()
def load(self):
# Load the dataset GT
self.lab_root = '{0}/lab/'.format(self.path)
self.img_root = '{0}/jpg/'.format(self.path)
lab_filenames = np.sort(os.listdir(self.lab_root))
# Get the filenames without the extension
self.img_filenames = [e[:-4] for e in np.sort(os.listdir(self.img_root)) if e[:-4] not in self.blacklisted]
self.relevants = {}
self.junk = {}
self.non_relevants = {}
self.filename_to_name = {}
self.name_to_filename = OrderedDict()
self.q_roi = {}
for e in lab_filenames:
if e.endswith('_query.txt'):
q_name = e[:-len('_query.txt')]
q_data = file("{0}/{1}".format(self.lab_root, e)).readline().split(" ")
q_filename = q_data[0][5:] if q_data[0].startswith('oxc1_') else q_data[0]
self.filename_to_name[q_filename] = q_name
self.name_to_filename[q_name] = q_filename
good = set([e.strip() for e in file("{0}/{1}_ok.txt".format(self.lab_root, q_name))])
good = good.union(set([e.strip() for e in file("{0}/{1}_good.txt".format(self.lab_root, q_name))]))
junk = set([e.strip() for e in file("{0}/{1}_junk.txt".format(self.lab_root, q_name))])
good_plus_junk = good.union(junk)
self.relevants[q_name] = [i for i in range(len(self.img_filenames)) if
self.img_filenames[i] in good]
self.junk[q_name] = [i for i in range(len(self.img_filenames)) if self.img_filenames[i] in junk]
self.non_relevants[q_name] = [i for i in range(len(self.img_filenames)) if
self.img_filenames[i] not in good_plus_junk]
self.q_roi[q_name] = np.array(map(float, q_data[1:]), dtype=np.float32)
self.q_names = self.name_to_filename.keys()
self.q_index = np.array([self.img_filenames.index(self.name_to_filename[qn]) for qn in self.q_names])
self.N_images = len(self.img_filenames)
self.N_queries = len(self.q_index)
def score(self, sim, temp_dir, eval_bin):
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
idx = np.argsort(sim, axis=1)[:, ::-1]
maps = [self.score_rnk_partial(i, idx[i], temp_dir, eval_bin) for i in range(len(self.q_names))]
for i in range(len(self.q_names)):
print("{0}: {1:.2f}".format(self.q_names[i], 100 * maps[i]))
print(20 * "-")
print("Mean: {0:.1f}".format(round(100 * np.mean(maps), 1)))
# print ("Mean: {0:.2f}".format(100 * np.mean(maps)))
def score_rnk_partial(self, i, idx, temp_dir, eval_bin):
rnk = np.array(self.img_filenames)[idx]
with open("{0}/{1}.rnk".format(temp_dir, self.q_names[i]), 'w') as f:
f.write("\n".join(rnk) + "\n")
cmd = "{0} {1}{2} {3}/{4}.rnk".format(eval_bin, self.lab_root, self.q_names[i], temp_dir, self.q_names[i])
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
map_ = float(p.stdout.readlines()[0])
p.wait()
return map_
def get_filename(self, i):
return os.path.normpath("{0}/{1}.jpg".format(self.img_root, self.img_filenames[i]))
def get_query_filename(self, i):
return os.path.normpath("{0}/{1}.jpg".format(self.img_root, self.img_filenames[self.q_index[i]]))
def get_query_roi(self, i):
return self.q_roi[self.q_names[i]]
def extract_features(dataset, image_helper, net, gpu_num):
dim_features = 2048
N_queries = dataset.N_queries
features_queries = np.zeros((N_queries, dim_features), dtype=np.float32)
for i in tqdm(range(N_queries)):
# Extract features for queries
I = image_helper.load_and_prepare_image(dataset.get_query_filename(i),roi=dataset.get_query_roi(i))
features_queries[i] = image_helper.get_features(I, net, gpu_num)
dim_features = 2048
N_dataset = dataset.N_images
features_dataset = np.zeros((N_dataset, dim_features), dtype=np.float32)
for i in tqdm(range(N_dataset)):
# Extract features for dataset
I = image_helper.load_and_prepare_image(dataset.get_filename(i), roi=None)
features_dataset[i] = image_helper.get_features(I, net, gpu_num)
return features_queries, features_dataset
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Evaluate Oxford / Paris')
parser.add_argument('--dataset', type=str, required=True, help='Choose the Oxford or Paris')
parser.set_defaults(multires=False)
args = parser.parse_args()
gpu_num = 0
S = 1024 #Maximum dimension
dataset_path = 'datasets/' + args.dataset
eval_binary = 'datasets/evaluation/compute_ap'
temp_dir = 'temp'
weight_path = 'weights/ContextAwareRegionalAttention_weights.pth'
means = np.array([103.93900299, 116.77899933, 123.68000031], dtype=np.float32)[None, :, None, None]
net = OurNet()
net.eval()
net = net.cuda(gpu_num)
#Load the trained weights of regional attention network
dic = torch.load(weight_path)
net.region_attention.load_state_dict(dic, strict=True)
shutil.rmtree(temp_dir, ignore_errors=True)
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
# Load the dataset and the image helper
dataset = Dataset(dataset_path, eval_binary)
image_helper = ImageHelper(S, means)
# Extract features
features_queries, features_dataset = extract_features(dataset, image_helper, net, gpu_num)
# Compute similarity
sim = features_queries.dot(features_dataset.T)
dataset.score(sim, temp_dir, eval_binary)
# query expansion with top-5 results
print("Query expansion...")
top_k = 5
idx = np.argsort(sim, axis=1)[:, ::-1]
qe_features_queries = normalize(np.vstack(
[np.vstack((features_queries[i], features_dataset[idx[i, :top_k]])).mean(axis=0) for i in
range(len(features_queries))]))
sim = qe_features_queries.dot(features_dataset.T)
dataset.score(sim, temp_dir, eval_binary)