diff --git a/.gitignore b/.gitignore index 0c25f72..4cf6ddd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.tar -checkpoint.ipynb *-checkpoint.ipynb +script/ diff --git a/train.py b/train.py index 410e098..d089497 100644 --- a/train.py +++ b/train.py @@ -16,7 +16,7 @@ from utilities.utils import update_vis_plot from utilities.utils import visualize_GT -from utilities.dataloader_test import detection_collate +from utilities.dataloader import detection_collate import yolov1 viz = visdom.Visdom() @@ -54,9 +54,9 @@ DATASET_PATH_MARTIN_LAP = "/home/martin/Desktop/5class/_class_balance/" DATASET_PATH_MARTIN_COM = "/home/martin/Desktop/_class_balance/" DATASET_PATH_JAEWON = "D:\dataset\VOC2012" -# transforms.ToTensor() -train_dataset = VOC(root = DATASET_PATH_MARTIN_COM, - transform=None, cls_option = False, selective_cls=None) + + +train_dataset = VOC(root = DATASET_PATH_MARTIN_COM, transform=None) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size = batch_size, @@ -64,6 +64,7 @@ collate_fn=detection_collate) +""" net = yolov1.YOLOv1() # visualize_weights_distribution(net) @@ -155,4 +156,6 @@ 'optimizer': optimizer.state_dict(), }, False, filename='checkpoint_{}.pth.tar'.format(epoch)) - f.close() \ No newline at end of file + f.close() + +""" \ No newline at end of file diff --git a/train_small.py b/train_small.py index c06fdce..dce0d90 100644 --- a/train_small.py +++ b/train_small.py @@ -1,5 +1,7 @@ import sys +import os +import argparse import warnings import shutil import numpy as np @@ -16,9 +18,9 @@ from PIL import Image from torchsummary.torchsummary import summary -from utilities import dataloader from utilities.dataloader import detection_collate from utilities.dataloader import VOC + from utilities.utils import save_checkpoint from utilities.utils import create_vis_plot from utilities.utils import update_vis_plot @@ -27,154 +29,208 @@ from imgaug import augmenters as iaa warnings.filterwarnings("ignore") - -plt.ion() # interactive mode - -viz = visdom.Visdom(use_incoming_socket=False) -vis_title = 'Yolo V1 Deepbaksu_vision (feat. martin, visionNoob) PyTorch on ' + 'VOC' -vis_legend = ['Train Loss'] -iter_plot = create_vis_plot(viz, 'Iteration', 'Total Loss', vis_title, vis_legend) -coord1_plot = create_vis_plot(viz, 'Iteration', 'coord1', vis_title, vis_legend) -size1_plot = create_vis_plot(viz, 'Iteration', 'size1', vis_title, vis_legend) -noobjectness1_plot = create_vis_plot(viz, 'Iteration', 'noobjectness1', vis_title, vis_legend) -objectness1_plot = create_vis_plot(viz, 'Iteration', 'objectness1', vis_title, vis_legend) -obj_cls_plot = create_vis_plot(viz, 'Iteration', 'obj_cls', vis_title, vis_legend) - -#1. Device configuration -device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') - -USE_SANITY_CHECK = False -USE_AUGMENTAION = False -num_epochs = 16000 -num_classes = 1 -batch_size = 15 -learning_rate = 1e-3 -dropout_prop = 0.5 - -DATASET_PATH_MARTIN = "/home/martin/Desktop/5class/_class_balance/" -DATASET_PATH_JAEWON = "H:\VOC\VOC12\VOCdevkit_2\VOC2012" -SMALL_DATASET_PATH = "H:/person-300" - -DATASET_PATH = SMALL_DATASET_PATH - -#2. Data augmentation setting -if(USE_AUGMENTAION): - seq = iaa.SomeOf(2,[ - iaa.Multiply((1.2, 1.5)), # change brightness, doesn't affect BBs +#plt.ion() # interactive mode + +parser = argparse.ArgumentParser(description='YOLO v1.') +parser.add_argument('--mode', type=str, help='train or test', default='train') +parser.add_argument('--dataset', type=str, help='dataset to train on, voc', default='voc') +parser.add_argument('--data_path', type=str, help='path to the data', required=True) +parser.add_argument('--class_path', type=str, help='path to the filenames text file', required=True) +parser.add_argument('--input_height', type=int, help='input height', default=448) +parser.add_argument('--input_width', type=int, help='input width', default=448) +parser.add_argument('--batch_size', type=int, help='batch size', default=15) +parser.add_argument('--num_epochs', type=int, help='number of epochs', default=16000) +parser.add_argument('--learning_rate', type=float, help='initial learning rate', default=1e-3) +parser.add_argument('--dropout', type=float, help='dropout probability', default=0.5) +parser.add_argument('--num_gpus', type=int, help='number of GPUs to use for training', default=1) +parser.add_argument('--checkpoint_path', type=str, help='path to a specific checkpoint to load', default='./') + +# flag +parser.add_argument('--use_augmentation', type=bool, help='Image Augmentation', default=True) +parser.add_argument('--use_visdom', type=bool, help='visdom board', default=True) +parser.add_argument('--use_summary', type=bool, help='descripte Model summary', default=True) + +# develop +parser.add_argument('--num_class', type=int, help='number of class', default=5, required=True) +args = parser.parse_args() + +# model = torch.nn.DataParallel(net, device_ids=[0]).cuda() +def train(params): + + # future work variable + dataset = params["dataset"] + input_height = params["input_height"] + input_width = params["input_width"] + + data_path = params["data_path"] + class_path = params["class_path"] + batch_size = params["batch_size"] + num_epochs = params["num_epochs"] + learning_rate = params["lr"] + dropout = params["dropout"] + num_gpus = [ i for i in range(params["num_gpus"])] + checkpoint_path = params["checkpoint_path"] + + USE_VISDOM = params["use_visdom"] + USE_SUMMARY = params["use_summary"] + USE_AUGMENTATION = params["use_augmentation"] + + num_class = params["num_class"] + + + + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + + if USE_VISDOM: + viz = visdom.Visdom(use_incoming_socket=False) + vis_title = 'Yolo V1 Deepbaksu_vision (feat. martin, visionNoob) PyTorch on ' + 'VOC' + vis_legend = ['Train Loss'] + iter_plot = create_vis_plot(viz, 'Iteration', 'Total Loss', vis_title, vis_legend) + coord1_plot = create_vis_plot(viz, 'Iteration', 'coord1', vis_title, vis_legend) + size1_plot = create_vis_plot(viz, 'Iteration', 'size1', vis_title, vis_legend) + noobjectness1_plot = create_vis_plot(viz, 'Iteration', 'noobjectness1', vis_title, vis_legend) + objectness1_plot = create_vis_plot(viz, 'Iteration', 'objectness1', vis_title, vis_legend) + obj_cls_plot = create_vis_plot(viz, 'Iteration', 'obj_cls', vis_title, vis_legend) + + # 2. Data augmentation setting + if (USE_AUGMENTATION): + seq = iaa.SomeOf(2, [ + iaa.Multiply((1.2, 1.5)), # change brightness, doesn't affect BBs iaa.Affine( translate_px={"x": 3, "y": 10}, scale=(0.9, 0.9) - ), # translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs - iaa.AdditiveGaussianNoise(scale=0.1*255), + ), # translate by 40/60px on x/y axis, and scale to 50-70%, affects BBs + iaa.AdditiveGaussianNoise(scale=0.1 * 255), iaa.CoarseDropout(0.02, size_percent=0.15, per_channel=0.5), iaa.Affine(rotate=45), iaa.Sharpen(alpha=0.5) ]) - - -else: - seq = iaa.Sequential([]) - -composed = transforms.Compose([Augmenter(seq)]) - -#3. Load Dataset -train_dataset = VOC(root = DATASET_PATH_MARTIN, transform=composed, class_path="names/5class.names") - -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, - batch_size = batch_size, - shuffle = True, - collate_fn=detection_collate) - -#4. Sanity Check for dataloader -if(USE_SANITY_CHECK): - images, labels, size = iter(train_loader).next() - images = images.to(device) - labels = labels.to(device) - plt.imshow(np.transpose(images[0],(1,2,0))) - -#5. Load YOLOv1 -net = yolov1_small.SmallYOLOv1() -use_cuda = torch.cuda.is_available() -device = torch.device("cuda" if use_cuda else "cpu") -model = yolov1_small.SmallYOLOv1().to(device) - -#6. Sanity Check for output dimention -if(USE_SANITY_CHECK): - #for just a image - test_image = images[0] - outputs = model(torch.cuda.FloatTensor(np.expand_dims(test_image,axis=0))) - print(outputs.shape) - - #for images (batch size) - outputs = model(torch.cuda.FloatTensor(images)) - print(outputs.shape) - -# 7.Train the model -#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-5) -optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) -scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95) - -# Train the model -total_step = len(train_loader) -for epoch in range(num_epochs): - - if (epoch == 200) or (epoch == 400) or (epoch == 600) or (epoch == 20000) or (epoch == 30000): - scheduler.step() - - for i, (images, labels, size) in enumerate(train_loader): - - - images = images.to(device) - labels = labels.to(device) - - # Forward pass - outputs = model(images) - - # Calc Loss - loss, \ - obj_coord1_loss, \ - obj_size1_loss, \ - obj_class_loss, \ - noobjness1_loss, \ - objness1_loss = detection_loss_4_small_yolo(outputs, labels) - #objness2_loss = yolov1.detection_loss(outputs, labels) - - - - # Backward and optimize - optimizer.zero_grad() - loss.backward() - optimizer.step() - - if (i + 1) % 10 == 0: - - print('Epoch ,[{}/{}] ,Step ,[{}/{}] ,lr ,{} ,total_loss ,{:.4f} ,coord1 ,{} ,size1 ,{} ,noobj_clss ,{} ,objness1 ,{} ,' - .format(epoch + 1, - num_epochs, - i + 1, - total_step, - [param_group['lr'] for param_group in optimizer.param_groups], - loss.item(), - obj_coord1_loss, - obj_size1_loss, - obj_class_loss, - noobjness1_loss, - objness1_loss - )) - - - update_vis_plot(viz, (epoch+1)*batch_size +(i + 1), loss.item(), iter_plot, None, 'append') - update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_coord1_loss, coord1_plot, None, 'append') - update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_size1_loss, size1_plot, None, 'append') - update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_class_loss, obj_cls_plot, None, 'append') - update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), noobjness1_loss, noobjectness1_plot, None, 'append') - update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), objness1_loss, objectness1_plot, None, 'append') - - - if (epoch % 300) == 0: - save_checkpoint({ - 'epoch': epoch + 1, - 'arch': "YOLOv1", - 'state_dict': model.state_dict(), - 'optimizer': optimizer.state_dict(), - }, False, filename='checkpoint_{}.pth.tar'.format(epoch)) + else: + seq = iaa.Sequential([]) + + composed = transforms.Compose([Augmenter(seq)]) + + # 3. Load Dataset + # composed + # transforms.ToTensor + train_dataset = VOC(root=data_path, transform=transforms.ToTensor(), class_path=class_path) + + train_loader = torch.utils.data.DataLoader(dataset=train_dataset, + batch_size=batch_size, + shuffle=True, + collate_fn=detection_collate) + + # 5. Load YOLOv1 + net = yolov1_small.SmallYOLOv1(params={"dropout" : dropout, "num_class" : num_class}) + model = torch.nn.DataParallel(net, device_ids=num_gpus).cuda() + + if USE_SUMMARY: + summary(model, (3, 448, 448)) + + # 7.Train the model + optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) + scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95) + + # Train the model + total_step = len(train_loader) + for epoch in range(num_epochs): + + if (epoch == 200) or (epoch == 400) or (epoch == 600) or (epoch == 20000) or (epoch == 30000): + scheduler.step() + + for i, (images, labels, sizes) in enumerate(train_loader): + + print(images) + print(images.shape) + print() + exit() + + images = images.to(device) + labels = labels.to(device) + + + + # Forward pass + outputs = model(images) + + # Calc Loss + loss, \ + obj_coord1_loss, \ + obj_size1_loss, \ + obj_class_loss, \ + noobjness1_loss, \ + objness1_loss = detection_loss_4_small_yolo(outputs, labels) + # objness2_loss = yolov1.detection_loss(outputs, labels) + + + + # Backward and optimize + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if (i + 1) % 10 == 0: + print( + 'Epoch ,[{}/{}] ,Step ,[{}/{}] ,lr ,{} ,total_loss ,{:.4f} ,coord1 ,{} ,size1 ,{} ,noobj_clss ,{} ,objness1 ,{} ,' + .format(epoch + 1, + num_epochs, + i + 1, + total_step, + [param_group['lr'] for param_group in optimizer.param_groups], + loss.item(), + obj_coord1_loss, + obj_size1_loss, + obj_class_loss, + noobjness1_loss, + objness1_loss + )) + + if USE_VISDOM: + update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), loss.item(), iter_plot, None, 'append') + update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_coord1_loss, coord1_plot, None, 'append') + update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_size1_loss, size1_plot, None, 'append') + update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), obj_class_loss, obj_cls_plot, None, 'append') + update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), noobjness1_loss, noobjectness1_plot, None, + 'append') + update_vis_plot(viz, (epoch + 1) * batch_size + (i + 1), objness1_loss, objectness1_plot, None, + 'append') + + if (epoch % 300) == 0: + save_checkpoint({ + 'epoch': epoch + 1, + 'arch': "YOLOv1", + 'state_dict': model.state_dict(), + 'optimizer': optimizer.state_dict(), + }, False, filename=os.path.join(checkpoint_path, 'checkpoint_{}.pth.tar'.format(epoch))) + +def main(): + params = { + "mode" : args.mode, + "dataset" : args.dataset, + "data_path" : args.data_path, + "class_path" : args.class_path, + "input_height" : args.input_height, + "input_width" : args.input_width, + "batch_size" : args.batch_size, + "num_epochs" : args.num_epochs, + "lr" : args.learning_rate, + "dropout" : args.dropout, + "num_gpus" : args.num_gpus, + "checkpoint_path" : args.checkpoint_path, + + "use_visdom" : args.use_visdom, + "use_summary" : args.use_summary, + "use_augmentation" : args.use_augmentation, + + "num_class" : args.num_class + + } + + if params["mode"] == "train": + train(params) + elif params["mode"] == "test": + # Future Work + pass + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/utilities/dataloader.py b/utilities/dataloader.py index f51dc82..19cd657 100644 --- a/utilities/dataloader.py +++ b/utilities/dataloader.py @@ -3,13 +3,16 @@ import torch import torch.utils.data as data -import matplotlib.pyplot as plt import numpy as np + from PIL import Image from convertYolo.Format import YOLO as cvtYOLO from convertYolo.Format import VOC as cvtVOC +# develop +import matplotlib.pyplot as plt + sys.path.insert(0, os.path.dirname(__file__)) def detection_collate(batch): @@ -19,7 +22,7 @@ def detection_collate(batch): batch : batch data ``batch[0]`` : image, ``batch[1]`` : label, ``batch[3]`` : size Return: - image tensor, label tensor, sizes + image tensor, label tensor Future work: return value(torch.stack) change to Torch.FloatTensor() @@ -57,6 +60,7 @@ def detection_collate(batch): # [objectness, class, x offset, y offset, width ratio, height ratio] np_label[grid_x_index][grid_y_index] = np.array([objectness, cls, x_offset, y_offset, w_ratio, h_ratio]) + label = torch.from_numpy(np_label) targets.append(label) @@ -89,6 +93,7 @@ def __init__(self, root, train=True, transform=None, target_transform=None, resi self.target_transform = target_transform self.train = train self.resize_factor = resize + self.class_path = class_path with open(class_path) as f: self.classes = f.read().splitlines() @@ -99,7 +104,6 @@ def __init__(self, root, train=True, transform=None, target_transform=None, resi self.data = self.cvtData() def _check_exists(self): - print("Image Folder : {}".format(os.path.join(self.root, self.IMAGE_FOLDER))) print("Label Folder : {}".format(os.path.join(self.root, self.LABEL_FOLDER))) @@ -110,7 +114,8 @@ def cvtData(self): result = [] voc = cvtVOC() - yolo = cvtYOLO(os.path.abspath(self.classes)) + + yolo = cvtYOLO(os.path.abspath(self.class_path)) flag, self.dict_data =voc.parse(os.path.join(self.root, self.LABEL_FOLDER)) try: @@ -152,31 +157,28 @@ def __getitem__(self, index): tuple: Tuple(image, target). target is the object returned by YOLO annotation as [ [ class, - x of center point, + print("Hello") x of center point, y of center point, width represented ratio of image width, height represented ratio of image height ] ] + """ key = list(self.data[index].keys())[0] + img = Image.open(key).convert('RGB') current_shape = img.size img = img.resize((self.resize_factor, self.resize_factor)) - img = np.array(img.getdata(), dtype=np.float).reshape(img.size[0], img.size[1], 3) + target = self.data[index][key] if self.transform is not None: - print("It's not supported, transform parameter should be None") - exit() - - else: - img = torch.FloatTensor(img) - img = torch.div(img, 255) + img = self.transform(img) if self.target_transform is not None: # Future works pass - + return img, target, current_shape \ No newline at end of file diff --git a/yolov1_small.py b/yolov1_small.py index fbcc9ff..fcfd333 100644 --- a/yolov1_small.py +++ b/yolov1_small.py @@ -1,4 +1,5 @@ import shutil + import torch import torch.nn as nn import torchvision @@ -13,15 +14,13 @@ import visdom -# Hyper parameters -num_epochs = 16000 -num_classes = 1 -batch_size = 64 -learning_rate = 5e-5 -dropout_prop = 0.5 class SmallYOLOv1(nn.Module): - def __init__(self): + def __init__(self, params): + + self.dropout_prop = params["dropout"] + self.num_classes = params["num_class"] + super(SmallYOLOv1, self).__init__() # LAYER 1 self.layer1 = nn.Sequential( @@ -138,15 +137,14 @@ def __init__(self): self.fc1 = nn.Sequential( nn.Linear(7*7*1024, 4096), nn.LeakyReLU(), - nn.Dropout(dropout_prop) + nn.Dropout(self.dropout_prop) ) self.fc2 = nn.Sequential( - nn.Linear(4096, 7*7*((5)+num_classes)), - nn.Dropout(dropout_prop), + nn.Linear(4096, 7*7*((5)+self.num_classes)), + nn.Dropout(self.dropout_prop), ) - for m in self.modules(): if isinstance(m, nn.Conv2d): @@ -184,7 +182,7 @@ def forward(self, x): out = out.reshape(out.size(0), -1) out = self.fc1(out) out = self.fc2(out) - out = out.reshape((-1,7,7,((5)+num_classes))) + out = out.reshape((-1,7,7,((5)+self.num_classes))) return out