-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathmain.py
148 lines (126 loc) · 5.39 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import argparse, os
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
from model import NonBinaryNet, BinaryNet
# Training settings
parser = argparse.ArgumentParser(description='Binary Neurons')
parser.add_argument('--binary', action='store_true', default=False,
help='Use binary activations instead of float')
parser.add_argument('--stochastic', action='store_true', default=False,
help='Use stochastic activations instead of deterministic [active iff `--binary`]')
parser.add_argument('--reinforce', action='store_true', default=False,
help='Use REINFORCE Estimator instead of Straight Through Estimator [active iff `--binary`]')
parser.add_argument('--slope-annealing', action='store_true', default=False,
help='Use slope annealing trick')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--epochs', type=int, default=100, metavar='N',
help='number of epochs to train (default: 100)')
parser.add_argument('--lr', type=float, default=0.1, metavar='LR',
help='learning rate (default: 0.1)')
parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
help='SGD momentum (default: 0.9)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
args = parser.parse_args()
# Model, activation type, estimator type
if args.binary:
if args.stochastic:
mode = 'Stochastic'
else:
mode = 'Deterministic'
if args.reinforce:
estimator = 'REINFORCE'
else:
estimator = 'ST'
model = BinaryNet(mode=mode, estimator=estimator)
else:
model = NonBinaryNet()
mode = None
estimator = None
# Cuda
args.cuda = not args.no_cuda and torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
torch.cuda.manual_seed(args.seed)
model.cuda()
# Dataset
kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('./data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=args.batch_size, shuffle=True, **kwargs)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
# Slope annealing
if args.slope_annealing:
get_slope = lambda epoch : 1.0 * (1.005 ** (epoch - 1))
else:
get_slope = lambda epoch : 1.0
# Training procedure
def train(epoch):
slope = get_slope(epoch)
print '# Epoch : {} - Slope : {}'.format(epoch, slope)
model.train()
train_loss = 0
for batch_idx, (data, target) in enumerate(train_loader):
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = model((data, slope))
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
train_loss += loss.data
train_loss /= len(train_loader)
train_loss = train_loss[0]
print 'Training Loss : {}'.format(train_loss)
return train_loss
# Testing procedure
def test(epoch, best_acc):
slope = get_slope(epoch)
model.eval()
test_loss = 0.0
correct = 0.0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model((data, slope))
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
test_acc = correct / len(test_loader.dataset)
print 'Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, int(correct), len(test_loader.dataset),
100. * test_acc)
if test_acc >= best_acc:
torch.save(model.state_dict(), os.path.join('models','{}.pth'.format(model_name)))
return test_loss, test_acc
model_name = '{}-{}-{}-{}'.format(model.__class__.__name__, model.mode, model.estimator, args.slope_annealing)
print 'Model : {}'.format(model_name.replace('-', ' - '))
best_acc = 0.0
log_file = open(os.path.join('logs', '{}.log'.format(model_name)), 'w')
log_file.write('Epoch,TrainLoss,TestLoss,TestAccuracy\n')
for epoch in range(1, args.epochs + 1):
train_loss = train(epoch)
test_loss, test_acc = test(epoch, best_acc)
best_acc = max(test_acc, best_acc)
log_file.write('{},{},{},{}\n'.format(epoch, train_loss, test_loss, test_acc))
log_file.flush()
log_file.close()