hp_search_benchmarks/darts_penntreebank_pytorch/adaptive.yaml

name: darts_rnn_nas

data:
  data_download_dir: /data

hyperparameters:
  learning_rate: 20
  global_batch_size: 64
  # Epoch to start checking whether we should switch to
  # ASGD instead of SGD.
  optimizer_switch_epoch: 75
  eval_batch_size: 10
  emsize: 850
  nhid: 850
  nhidlast: 850
  bptt: 35
  dropout: 0.75
  dropouth: 0.25
  dropoutx: 0.75
  dropouti: 0.2
  dropoute: 0.1
  nonmono: 5
  alpha: 0
  beta: 1.0e-3
  weight_decay: 8.0e-7
  max_seq_length_delta: 20
  clip_gradients_l2_norm: 0.25


  # Tunable hyperparameters
  node1_edge: 
    type: categorical
    vals: [0]
  node2_edge: 
    type: categorical
    vals: [0, 1]
  node3_edge: 
    type: categorical
    vals: [0, 1, 2]
  node4_edge: 
    type: categorical
    vals: [0, 1, 2, 3]
  node5_edge: 
    type: categorical
    vals: [0, 1, 2, 3, 4]
  node6_edge: 
    type: categorical
    vals: [0, 1, 2, 3, 4, 5]
  node7_edge: 
    type: categorical
    vals: [0, 1, 2, 3, 4, 5, 6]
  node8_edge: 
    type: categorical
    vals: [0, 1, 2, 3, 4, 5, 6, 7]
  node1_op: 
    type: categorical
    vals: [tanh, relu, sigmoid, identity]
  node2_op: 
    type: categorical
    vals: [tanh, relu, sigmoid, identity]
  node3_op: 
    type: categorical
    vals: [tanh, relu, sigmoid, identity]
  node4_op: 
    type: categorical
    vals: [tanh, relu, sigmoid, identity]
  node5_op: 
    type: categorical
    vals: [tanh, relu, sigmoid, identity]
  node6_op: 
    type: categorical
    vals: [tanh, relu, sigmoid, identity]
  node7_op: 
    type: categorical
    vals: [tanh, relu, sigmoid, identity]
  node8_op: 
    type: categorical
    vals: [tanh, relu, sigmoid, identity]

resources:
  slots_per_trial: 1

scheduling_unit: 100

bind_mounts:
    - host_path: /tmp
      container_path: /data
      read_only: false

min_validation_period:
  batches: 500

searcher:
  name: adaptive_asha
  metric: perplexity # a measure of how well the learned distribution predicts the data. lower perplexity is better. 
  max_length: 
    batches: 125000
  max_trials: 10000
  mode: aggressive
  max_concurrent_trials: 16
  smaller_is_better: true 

entrypoint: model_def:DARTSRNNTrial