-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathevaluation.py
135 lines (100 loc) · 5.08 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#%%
import os, sys
from sklearn.model_selection import train_test_split
import random
import detectron2
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.engine import DefaultTrainer
from DataLoader import DataLoader
from MetricsVisualizer import MetricsVisualizer
# %%
root_dir = "./../Data" # change this to download to a specific location on your pc
DataLoader().download_datasets(root_dir)
DataLoader().download_trained_models(root_dir)
DataLoader().generateAllJsonDataAnnotations(root_dir)
# %%
# to decide which data should be loaded use this:
type_of_annotation = ["system_measures"]
# type_of_annotation = ["stave_measures"]
# type_of_annotation = ["staves"]
# type_of_annotation = ["system_measures", "staves"]
# type_of_annotation = ["system_measures", "stave_measures", "staves"]
json_pathname_extension = "-".join(str(elem) for elem in type_of_annotation)
# %%
json_path = os.path.join(root_dir, "CVC_muscima_" + json_pathname_extension + ".json")
muscima_data = DataLoader().load_from_json(json_path)
DataLoader().show_data(muscima_data, type_of_annotation)
# %%
json_path = os.path.join(root_dir, "AudioLabs_" + json_pathname_extension + ".json")
audioLabs_data = DataLoader().load_from_json(json_path)
# %%
def registerDataset(data_name, d, data, classes):
DatasetCatalog.register(data_name, lambda d=d: data)
MetadataCatalog.get(data_name).set(thing_classes=classes)
return MetadataCatalog.get(data_name)
# Put all pages for an augmentation into one set (training, test, validation)
# this code makes sure that a music page will be in one set with all their augmentations
# we do not want the same music pages with different augmentations ending up in the training and test dataset. (data-leak)
musicma_train_data, musicma_test_data, musicma_val_data = DataLoader().custom_muscima_split(muscima_data)
audiolabs_train_data, test_val_data = train_test_split(audioLabs_data, test_size=0.4, random_state=1)
audiolabs_test_data, audiolabs_val_data = train_test_split(test_val_data, test_size=0.5, random_state=1)
train_data = musicma_train_data + audiolabs_train_data
test_data = musicma_test_data + audiolabs_test_data
val_data = musicma_val_data + audiolabs_val_data
train_data_name = "train"
metadata = registerDataset(train_data_name, train_data_name, train_data, type_of_annotation)
test_data_name = "test"
registerDataset(test_data_name, test_data_name, test_data, type_of_annotation)
val_data_name = "val"
registerDataset(val_data_name, val_data_name, val_data, type_of_annotation)
# %%
def setup_cfg(train_data_name, test_data_name, num_classes, model_output_dir, cfg_file, existing_model_weight_path):
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file(cfg_file))
cfg.OUTPUT_DIR = model_output_dir
cfg.DATASETS.TRAIN = (train_data_name,)
cfg.DATASETS.TEST = (test_data_name,)
# TODO: how about unix / mac?
if sys.platform.startswith("linux"):
cfg.DATALOADER.NUM_WORKERS = 4 # Number of data loading threads
else:
# has to be 0 for windows see: https://github.com/pytorch/pytorch/issues/2341
cfg.DATALOADER.NUM_WORKERS = 0 # Number of data loading threads
cfg.MODEL.WEIGHTS = existing_model_weight_path
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512 # 128 faster, and good enough for toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = num_classes
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2 # set the testing threshold for this model. Model should be at least 20% confident detection is correct
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.2
# Set seed to negative to fully randomize everything.
# Set seed to positive to use a fixed seed. Note that a fixed seed increases
# reproducibility but does not guarantee fully deterministic behavior.
# Disabling all parallelism further increases reproducibility.
cfg.SEED = 1
return cfg
# %%
network_type = "R_50_FPN_3x"
# network_type = "R_101_FPN_3x"
# network_type = "X_101_32x8d_FPN_3x"
model_dir = os.path.join(root_dir, "Models", network_type + "-" + json_pathname_extension)
cfg_file = "COCO-Detection/faster_rcnn_" + network_type + ".yaml"
weight_file = os.path.join(model_dir, "last_checkpoint")
last_checkpoint = open(weight_file, "r").read()
path_to_weight_file = os.path.join(model_dir, last_checkpoint)
cfg = setup_cfg(train_data_name, test_data_name, len(type_of_annotation), model_dir, cfg_file, path_to_weight_file)
#%%
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=True)
# %%
tasks = ("bbox",)
evaluator = COCOEvaluator(test_data_name, tasks, False, output_dir=cfg.OUTPUT_DIR)
val_loader = build_detection_test_loader(cfg, test_data_name)
print(inference_on_dataset(trainer.model, val_loader, evaluator))
# %%
print("model has been trained for :", trainer.start_iter, "iterations")
#%%
MetricsVisualizer().visualizeMetrics(root_dir, network_type, type_of_annotation, start_plot_iter=300)
# %%