-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathflux.jl
138 lines (114 loc) · 4.03 KB
/
flux.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# adapted from: https://github.com/FluxML/model-zoo
using Flux, Statistics
using Flux.Data: DataLoader
using Flux: onehotbatch, onecold, @epochs, flatten
using Flux.Losses: crossentropy, logitcrossentropy, mse, kldivergence
using MLDatasets
using Base: @kwdef
using HyperTuning
import Random
using CUDA
# Download the data, and create traint and test samples
function getdata(;batchsize = 256)
xtrain, ytrain = MLDatasets.MNIST(:train)[:]
xtest, ytest = MLDatasets.MNIST(:test)[:]
imgsize=(28,28,1)
nclasses=10
xtrain = flatten(xtrain)
xtest = flatten(xtest)
ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9)
train_loader = DataLoader((xtrain, ytrain), batchsize=batchsize, shuffle=true)
test_loader = DataLoader((xtest, ytest), batchsize=batchsize)
return train_loader, test_loader, imgsize, nclasses
end
# Compute the accuracy error to minimize
function eval_accuracy_error(loader, model, device)
l = 0f0
acc = 0
ntot = 0
for (x, y) in loader
x, y = x |> device, y |> device
ŷ = model(x)
acc += sum(onecold(ŷ |> cpu) .== onecold(y |> cpu))
ntot += size(x)[end]
end
return 1 - acc/ntot
end
function objective(trial)
# fix seed for the RNG
seed = get_seed(trial)
Random.seed!(seed)
# activate CUDA if possible
device = CUDA.functional() ? gpu : cpu
# Create test and train dataloaders
train_loader, test_loader, imgsize, nclasses = getdata()
# get suggested hyperparameters
@suggest activation in trial
@suggest n_dense in trial
@suggest dense in trial
# Create the model with dense layers (fully connected)
ann = []
n_input = prod(imgsize)
for n in dense[1:n_dense]
push!(ann, Dense(n_input, n, activation))
n_input = n
end
push!(ann, Dense(n_input, nclasses))
model = Chain(ann) |> device
# model parameters
ps = Flux.params(model)
# hyperparameters for the optimizer
@suggest η in trial
@suggest λ in trial
# Instantiate the optimizer
opt = λ > 0 ? Flux.Optimiser(WeightDecay(λ), ADAM(η)) : ADAM(η)
# get suggested loss
@suggest loss in trial
accuracy_error = 1.0
epochs = 20 # maximum number of training epochs
# Training
for epoch in 1:epochs
for (x, y) in train_loader
# batch computation
x, y = x |> device, y |> device
gs = Flux.gradient(ps) do
ŷ = model(x)
loss(ŷ, y)
end
Flux.Optimise.update!(opt, ps, gs)
end
# Compute intermediate accuracy error
accuracy_error = eval_accuracy_error(test_loader, model, device)
# report value to pruner
report_value!(trial, accuracy_error)
# check if pruning is necessary
should_prune(trial) && (return)
end
# if accuracy is over 90%, then trials is considered as feasible
accuracy_error < 0.1 && report_success!(trial)
# return objective function value
accuracy_error
end
# maximum and minimum number of dense layers
const MIN_DENSE = 2
const MAX_DENSE = 5
scenario = Scenario(### hyperparameters
# learning rates
η = (0.0..0.5),
λ = (0.0..0.5),
# activation functions
activation = [leakyrelu, relu],
# loss functions
loss = [mse, logitcrossentropy],
# number of dense layers
n_dense = MIN_DENSE:MAX_DENSE,
# number of neurons for each dense layer
dense = Bounds(fill(4, MAX_DENSE), fill(128, MAX_DENSE)),
### Common settings
pruner= MedianPruner(start_after = 5#=trials=#, prune_after = 10#=epochs=#),
verbose = true, # show the log
max_trials = 30, # maximum number of hyperparameters computed
)
display(scenario)
# minimize accuracy error
HyperTuning.optimize(objective, scenario)