-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathadaptive.yaml
103 lines (93 loc) · 2.09 KB
/
adaptive.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
name: darts_rnn_nas
data:
data_download_dir: /data
hyperparameters:
learning_rate: 20
global_batch_size: 64
# Epoch to start checking whether we should switch to
# ASGD instead of SGD.
optimizer_switch_epoch: 75
eval_batch_size: 10
emsize: 850
nhid: 850
nhidlast: 850
bptt: 35
dropout: 0.75
dropouth: 0.25
dropoutx: 0.75
dropouti: 0.2
dropoute: 0.1
nonmono: 5
alpha: 0
beta: 1.0e-3
weight_decay: 8.0e-7
max_seq_length_delta: 20
clip_gradients_l2_norm: 0.25
# Tunable hyperparameters
node1_edge:
type: categorical
vals: [0]
node2_edge:
type: categorical
vals: [0, 1]
node3_edge:
type: categorical
vals: [0, 1, 2]
node4_edge:
type: categorical
vals: [0, 1, 2, 3]
node5_edge:
type: categorical
vals: [0, 1, 2, 3, 4]
node6_edge:
type: categorical
vals: [0, 1, 2, 3, 4, 5]
node7_edge:
type: categorical
vals: [0, 1, 2, 3, 4, 5, 6]
node8_edge:
type: categorical
vals: [0, 1, 2, 3, 4, 5, 6, 7]
node1_op:
type: categorical
vals: [tanh, relu, sigmoid, identity]
node2_op:
type: categorical
vals: [tanh, relu, sigmoid, identity]
node3_op:
type: categorical
vals: [tanh, relu, sigmoid, identity]
node4_op:
type: categorical
vals: [tanh, relu, sigmoid, identity]
node5_op:
type: categorical
vals: [tanh, relu, sigmoid, identity]
node6_op:
type: categorical
vals: [tanh, relu, sigmoid, identity]
node7_op:
type: categorical
vals: [tanh, relu, sigmoid, identity]
node8_op:
type: categorical
vals: [tanh, relu, sigmoid, identity]
resources:
slots_per_trial: 1
scheduling_unit: 100
bind_mounts:
- host_path: /tmp
container_path: /data
read_only: false
min_validation_period:
batches: 500
searcher:
name: adaptive_asha
metric: perplexity # a measure of how well the learned distribution predicts the data. lower perplexity is better.
max_length:
batches: 125000
max_trials: 10000
mode: aggressive
max_concurrent_trials: 16
smaller_is_better: true
entrypoint: model_def:DARTSRNNTrial