-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathperceptual_loss_video.py
101 lines (86 loc) · 2.85 KB
/
perceptual_loss_video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import torch
import torch.nn as nn
from resnext.opts import parse_opts
from resnext import model
import argparse
import easydict
def get_args(depth):
if depth == 101:
path = 'resnext-101-kinetics.pth'
type = 'resnext'
if depth == 50:
path = 'resnet-50-kinetics.pth'
type = 'resnet'
args = easydict.EasyDict(
{
"root_path": '/root/data/ActivityNet',
"video_path": 'video_kinetics_jpg',
"annotation_path": 'kinetics.json',
"result_path": 'results',
"dataset": 'kinetics',
"n_classes": 400,
"n_finetune_classes": 400,
"sample_size": 64,
"sample_duration": 32,
"initial_scale": 1.0,
"n_scales": 5,
"scale_step": 0.84089641525,
"train_crop": 'corner',
"learning_rate": 0.1,
"momentum": 0.9,
"dampening": 0.9,
"weight_decay": 1e-3,
"mean_dataset": 'activitynet',
"no_mean_norm": False,
"std_norm": False,
"nesterov": False,
"optimizer": 'sgd',
"lr_patience": 10,
"batch_size": 32,
"n_epochs": 100,
"begin_epoch": 1,
"n_val_samples": 3,
"resume_path": '',
"pretrain_path": path,
"ft_begin_index": 0,
"no_train": False,
"no_val": False,
"test": False,
"test_subset": 'val',
"scale_in_test": 1.0,
"crop_position_in_test": 'c',
"no_softmax_in_test": False,
"no_cuda": False,
"n_threads": 4,
"checkpoint": 10,
"no_hflip": False,
"norm_value": 1,
"model": type,
"model_depth": depth,
"resnet_shortcut": 'B',
"wide_resnet_k": 2,
"resnext_cardinality": 32,
"manual_seed": 1
}
)
return args
class _resnext_videoDistance(nn.Module):
def __init__(self, depth):
super(_resnext_videoDistance, self).__init__()
self.resnext = _resnextFeatures(depth)
def forward(self, video1, video2):
batch_size = video1.size(0)
f1 = self.resnext(video1)
f2 = self.resnext(video2)
loss = torch.abs(video1 - video2).view(batch_size, -1).mean(1).cuda()
for i in range(1, 5):
layer_loss = torch.abs(f1[i] - f2[i]).view(batch_size, -1).mean(1)
loss = loss + layer_loss
return loss
class _resnextFeatures(nn.Module):
def __init__(self, depth):
super(_resnextFeatures, self).__init__()
args = get_args(depth)
self._resnext = model.generate_model(args)[0]
def forward(self, video):
return self._resnext(video) #return an array of outputs of some layers in resnext-101