nlp/albert_squad_pytorch/const.yaml

# After 2 epochs, model should hit 85.76/88.87 F1/EM
name: ALBert_SQuAD_PyTorch_1gpu
hyperparameters:
    global_batch_size: 2
    learning_rate: 5.0e-5
    model_type: 'albert'
    adam_epsilon: 1.0e-8
    weight_decay: 0
    num_warmup_steps: 13220  # 10% of total training
    max_seq_length: 384
    doc_stride: 128
    max_query_length: 64
    n_best_size: 20
    max_answer_length: 30
    null_score_diff_threshold: 0.0
    max_grad_norm: 1.0
    num_training_steps: 132198 # This is the number of optimizer steps. Train for 2 epochs
    do_lower_case: true
    use_radam: false
resources:
    slots_per_trial: 1
searcher:
    name: single
    metric: f1
    max_length:
        records: 264396
    smaller_is_better: false
min_validation_period:
    records: 80000
data:
    pretrained_model_name: "albert-xxlarge-v2"
    use_bind_mount: True
    bind_mount_path: /mnt/data
    task: "SQuAD2.0"  # SQuaD 2.0 has 132198 example.
entrypoint: model_def:AlbertSQuADPyTorch
optimizations:
    aggregation_frequency: 24
bind_mounts:
    - host_path: /tmp/
      container_path: /mnt/data
      read_only: false