-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathytvos_engine.py
62 lines (49 loc) · 2.53 KB
/
ytvos_engine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import math
import sys
from typing import Iterable
import torch
import util.misc as utils
def train_one_epoch(model: torch.nn.Module, criterion: torch.nn.Module,
data_loader: Iterable, optimizer: torch.optim.Optimizer,
device: torch.device, epoch: int, max_norm: float = 0, ):
model.train()
criterion.train()
metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
header = 'Epoch: [{}]'.format(epoch)
print_freq = 10
for qsamples, qtargets, ssamples, stargets, _ in metric_logger.log_every(data_loader, print_freq, header):
ssamples = ssamples.to(device)
scaptions = [t["caption"] for t in stargets]
stargets = utils.targets_to(stargets, device)
qsamples = qsamples.to(device)
q_captions = [t["caption"] for t in qtargets]
qtargets = utils.targets_to(qtargets, device)
outputs = model(qsamples, q_captions, qtargets, ssamples, scaptions, stargets)
loss_dict = criterion(outputs, qtargets)
weight_dict = criterion.weight_dict
losses = sum(loss_dict[k] * weight_dict[k] for k in loss_dict.keys() if k in weight_dict)
loss_dict_reduced = utils.reduce_dict(loss_dict)
loss_dict_reduced_unscaled = {f'{k}_unscaled': v
for k, v in loss_dict_reduced.items()}
loss_dict_reduced_scaled = {k: v * weight_dict[k]
for k, v in loss_dict_reduced.items() if k in weight_dict}
losses_reduced_scaled = sum(loss_dict_reduced_scaled.values())
loss_value = losses_reduced_scaled.item()
if not math.isfinite(loss_value):
print("Loss is {}, stopping training".format(loss_value))
print(loss_dict_reduced)
sys.exit(1)
optimizer.zero_grad()
losses.backward()
if max_norm > 0:
grad_total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)
else:
grad_total_norm = utils.get_total_grad_norm(model.parameters(), max_norm)
optimizer.step()
metric_logger.update(loss=loss_value, **loss_dict_reduced_scaled, **loss_dict_reduced_unscaled)
metric_logger.update(lr=optimizer.param_groups[0]["lr"])
metric_logger.update(grad_norm=grad_total_norm)
metric_logger.synchronize_between_processes()
print("Averaged stats:", metric_logger)
return {k: meter.global_avg for k, meter in metric_logger.meters.items()}