-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathmain.lua
290 lines (245 loc) · 9.4 KB
/
main.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
require 'nn'
require 'nngraph'
require 'hdf5'
require 'optim'
require 'trainer'
cmd = torch.CmdLine()
cmd:text()
cmd:text()
cmd:text('Convolutional neural net for health record classification')
cmd:text()
cmd:text('Options')
cmd:text()
cmd:text('Model Hyperparameters')
cmd:option('-kernels', '{1, 2, 3}', 'Kernel sizes of convolutions, table format.')
cmd:option('-num_feat_maps', 100, 'Number of feature maps after 1st convolution')
cmd:option('-dropoutProb', 0.5, 'Dropoff param')
cmd:option('-word_vec_size', 0, 'Dimensionality of word embeddings if not using pre-trained (0 o.w.)')
cmd:option('-vocab_size', 48848, 'size of vocab (if not using pre-trained')
cmd:option('-label_index', 1, 'index of the label to predict')
cmd:text()
cmd:text('Training Options')
cmd:option('-L2s', 3, 'L2 normalize weights')
cmd:option('-learning_rate', .5, 'Initial Learning Rate')
cmd:option('-optim_method', 'adadelta', 'Gradient descent method. Options: adadelta, adam')
cmd:option('-epochs', 20, 'Number of training epochs')
cmd:option('-param_init', 0.05, 'Initialize parameters at')
cmd:text()
cmd:text('General Options')
cmd:option('-data', 'data.h5', 'The h5 file containing the training data')
cmd:option('-seed', 1, 'random seed, set -1 for actual random')
cmd:option('-debug', 1, 'Print the confusion matrices')
cmd:option('-programming_mode', 0, 'only use one batch to test the code')
cmd:option('-gpuid', -1, 'which gpu to use. -1 = use CPU')
cmd:option('-save_cpu', 0, 'Save the checkpoint files as CPU readable models')
cmd:option('-savefile', '', 'Filename to autosave the checkpont to')
cmd:option('-gpuid', -1, 'GPU ID (-1 for CPU)')
cmd:text()
cmd:text('Test Options')
cmd:option('-test_only', 0, 'Set to 1 if you only want to test a model')
cmd:option('-unlabeled', 0, 'Set to 1 if you want to predict unlabeled data')
cmd:option('-store_preds', 0, 'Set to 1 if you want to store the predictions')
cmd:option('-verbose', 0, 'Set to 1 if you want a verbose prediction')
cmd:option('-checkpoint', '', 'path to the .t7 file of a trained model')
opt = cmd:parse(arg)
if opt.seed > 0 then
print("Setting the seed to " .. opt.seed)
torch.manualSeed(opt.seed)
end
local optim_method
if opt.optim_method == 'adadelta' then
optim_method = optim.adadelta
elseif opt.optim_method == 'adam' then
optim_method = optim.adam
end
--define the data loader
local data = torch.class("data")
function data:__init(opt, data_file, set)
local f = hdf5.open(data_file, 'r')
self.input = f:read(set):all()
--TODO:do this for all labels, change second number to corresponding concept.
if opt.unlabeled < 1 then
self.target = f:read(set .. '_label'):all():narrow(3, opt.label_index, 1):squeeze():add(1) --put label index here.
if opt.programming_mode == 1 then
self.input = self.input:narrow(1, 1, 1)
self.target = self.target:narrow(1, 1, 1)
end
if opt.gpuid > -1 then
self.input = self.input:cuda()
self.target = self.target:cuda()
end
end
self.length = self.input:size(1)
self.batchsize = self.input:size(2)
self.textsize = self.input:size(3)
end
function data:size()
return self.length
end
function data.__index(self, idx)
local input, target
if type(idx) == "string" then
return data[idx]
else
input = self.input[idx]
if opt.unlabeled < 1 then
target = self.target[idx]
end
end
return { input, target }
end
function get_layer(model, name)
local named_layer
function get(layer)
if layer.name == name or torch.typename(layer) == name then
named_layer = layer
end
end
model:apply(get)
return named_layer
end
function save_progress(fold_dev_scores, fold_test_scores, model, opt, epoch)
local savefile
if opt.savefile ~= '' then
savefile = string.format('%s_epoch_%d_dev_%.2f.t7', opt.savefile, epoch, fold_dev_scores)
else
savefile = string.format('results/%s_model_epoch_%d_dev_%.2f.t7', os.date('%Y%m%d_%H%M'), epoch, fold_dev_scores)
end
print('saving checkpoint to ', savefile)
local save = {}
save['dev_scores'] = fold_dev_scores
if opt.train_only == 0 then
save['test_scores'] = fold_test_scores
end
save['opt'] = opt
save['model'] = model
torch.save(savefile, save)
end
function make_model(vocab_size, emb_size, max_sent, w2v)
local model
print(string.len(opt.checkpoint))
if string.len(opt.checkpoint) == 0 then
--initialize parts of the model
local input = nn.Identity()()
--1. Lookup Table
local lookup = nn.LookupTable(vocab_size, emb_size)
lookup.name = "embedding_layer"
if opt.word_vec_size == 0 then
lookup.weight:copy(w2v)
else
--if we use random weights, initialize them here.
lookup.weight:uniform(-opt.param_init, opt.param_init)
end
--padding should be zero. If padding is not 2, change this!
lookup.weight[2]:zero()
lookup = lookup(input)
--2. Conv Kernels
local kernels = opt.kernels
local layer1 = {}
for i = 1, #kernels do
print("initializing kernel " .. i)
local conv
local conv_layer
local max_time
if opt.gpuid >= 0 then
conv = cudnn.SpatialConvolution(1, opt.num_feat_maps, emb_size, kernels[i])
conv_layer = nn.Reshape(opt.num_feat_maps,
max_sent - kernels[i] + 1, true)(conv(nn.Reshape(1, max_sent, emb_size, true)(lookup)))
max_time = nn.Max(3)(cudnn.ReLU()(conv_layer))
else
conv = nn.TemporalConvolution(emb_size, opt.num_feat_maps, kernels[i])
conv_layer = conv(lookup)
max_time = nn.Max(2)(nn.ReLU()(conv_layer)) -- max over time
end
conv.weight:uniform(-0.01, 0.01)
conv.bias:zero()
conv.name = 'convolution' .. kernels[i]
table.insert(layer1, max_time)
end
--can put skip kernel here if needed.
local conv_layer_concat
if #layer1 > 1 then
conv_layer_concat = nn.JoinTable(2)(layer1)
else
conv_layer_concat = layer1[1]
end
local last_layer = conv_layer_concat
-- simple MLP layer leading to two outcomes (0,1)
local linear = nn.Linear((#layer1) * opt.num_feat_maps, 2)
linear.name = 'linearLayer'
linear.weight:normal():mul(0.01)
linear.bias:zero()
local output = nn.LogSoftMax()(linear(nn.Dropout(opt.dropout_p)(last_layer)))
model = nn.gModule({ input }, { output })
else
print("Load model from Checkpoint")
model = torch.load(opt.checkpoint).model
end
local criterion = nn.ClassNLLCriterion()
-- get layers
local layers = {}
layers['linear'] = get_layer(model, 'nn.Linear')
layers['w2v'] = get_layer(model, 'nn.LookupTable')
return model, criterion, layers
end
function main()
if opt.gpuid >= 0 then
print('using CUDA on GPU ' .. opt.gpuid .. '...')
require 'cutorch'
require 'cudnn'
require 'cunn'
cutorch.setDevice(opt.gpuid + 1)
end
-- Retrieve kernels
loadstring("opt.kernels = " .. opt.kernels)()
if opt.test_only < 1 then
-- Create the data loader classes.
local train_data = data.new(opt, opt.data, 'train')
local valid_data = data.new(opt, opt.data, 'val')
local vocab_size = opt.vocab_size
local emb_size = opt.word_vec_size
local w2v
if opt.word_vec_size == 0 then
local f = hdf5.open(opt.data, 'r')
w2v = f:read('w2v'):all()
vocab_size = w2v:size(1)
emb_size = w2v:size(2)
end
-- Initialize Model and Criterion.
model, criterion, layers = make_model(vocab_size, emb_size, train_data.textsize, w2v)
if opt.gpuid >= 0 then
model:cuda()
criterion:cuda()
end
print("model build!")
print("Training starts...")
--get the parameters as tensors
local params, grads = model:getParameters()
local trainer = Trainer.new()
local test_data = data.new(opt, opt.data, 'test')
for epoch = 1, opt.epochs do
local train_err = trainer:train(train_data, model, criterion, optim_method, layers, state, params, grads, opt)
local dev_err = trainer:test(test_data, model, criterion, 0, opt)
print('epoch:', epoch, 'train perf:', 100 * train_err, '%, val perf ', 100 * dev_err, '%')
save_progress(dev_err, train_err, model, opt, epoch)
--break
end
else
local test_data = data.new(opt, opt.data, 'test')
assert(opt.checkpoint ~= '', 'you must specify a checkpoint to test with')
model, criterion, layers = make_model()
local trainer = Trainer.new()
print("Testing model")
if opt.verbose < 1 then
trainer:test(test_data, model, criterion, opt.store_preds, opt)
elseif opt.verbose == 1 then
print("Using verbose mode")
trainer:test_verbose(test_data, model, criterion, opt.store_preds, opt)
else
print("Using verbose mode over whole data")
trainer:test_verbose_all(test_data, model, criterion, opt.store_preds, opt)
end
os.exit()
end
end
main()