-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
107 lines (89 loc) · 4.02 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# */train.py
#
# PROGRAMMER: Meir Ofek
# DATE CREATED: Jun 14 2021
# REVISED DATE: Jun 30 2021
# PURPOSE: Create a function classify_images that uses the classifier function
# to create the classifier labels and then compares the classifier
# labels to the pet image labels. This function inputs:
# -The model name his arguments to implemnt the training .
# -The no of hidden layers arguamnt hidden_units.
# -The no of opochs , epochs
# - GPU Fale / True default true , for using the training comuting ,
#
# The program train.py will use the follwoyng functions
# Imports python modules
from time import time, sleep
import torch
from args_model_train import set_args
from model_test import model_test
from config_model import config_model
from load_classes import load_classes
from load_images import load_images
from train_nn import train_nn
from save_cpt import save_cpt
def main():
start_time = time()
in_args = set_args()
print("********************************")
print("Training network using the following parameters:")
print(in_args)
print("********************************")
print()
learning_rate = in_args.lrn_rate
epochs = in_args.epochs
hidden_units = in_args.hidden_units
model_name = in_args.arch
data_dir = in_args.dir
class_dict = in_args.class_dict
# Initialize device
if (in_args.gpu):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
else:
device = torch.device("cpu")
print("The model will run with device : {}".format(device))
checkpoint_dir = in_args.save_dir+"/"+str(device)+"_checkpoint.cpt"
mean=[0.485, 0.456, 0.406]
std=[0.229, 0.224, 0.225]
classes_dict = load_classes(file_path=class_dict)
# Prepare the image data and transfer to Tensors with normalization
print("\n Start loading and trasfrorm data ... ")
image_data , image_datasets = load_images( data_dir=data_dir,mean=mean,
std=std,rotaion=30,resize=244,batch_size=64)
no_of_classes = len(image_datasets["training_sets"].class_to_idx)
print(f"Number of classes = {no_of_classes}")
# Prepare the model
print("\n Setting teh model type and paramters ... ")
model_param = config_model(trained_model=model_name, device=device, output=no_of_classes,
drop_out=0.4, lrn_rate=learning_rate, hidden_units = hidden_units)
# Training the model
print("\n Starting the Training proccess ...")
train_nn(epochs, data_dict=image_data, device=device, model_param=model_param)
train_time = time()-start_time
start_time = time()
print("\n Train proccess completed .")
print("\n** Train Elapsed Runtime:",
str(int((train_time/3600)))+":"+str(int((train_time%3600)/60))+":"
+str(int((train_time%3600)%60)) )
# Test network accuracy
print("\n Starts the model accuracy testing ...")
model_test(model_param=model_param,device=device,data_dict=image_data)
test_time = time() - start_time
print("\n Model accuracy testing completed ...")
print("\n** Testing Elapsed Runtime:",
str(int((test_time/3600)))+":"+str(int(((test_time)%3600)/60))+":"
+str(int((test_time%3600)%60)) )
print("\n Started the save_checkpoint ...")
# Save checkpoint
save_cpt(model_param=model_param,checkpoint_dir=checkpoint_dir
,epochs=epochs,image_datasets=image_datasets,lrn_rate=learning_rate,
model_name=model_name)
total_time = train_time + test_time
print("\n** Total Elapsed Runtime:",
str(int((total_time/3600)))+":"+str(int((total_time%3600)/60))+":"
+str(int((total_time%3600)%60)) )
# Call to main function to run the program
if __name__ == "__main__":
main()