Skip to content

Commit

Permalink
hydra fairseq - add yaml files
Browse files Browse the repository at this point in the history
Summary: hydra fairseq - add yaml files

Reviewed By: alexeib

Differential Revision: D22403786

fbshipit-source-id: 81fb5902c1fbcf7b03d111037327ab0f8bfb57f2
  • Loading branch information
Mu Tian authored and facebook-github-bot committed Sep 1, 2020
1 parent fe1b1bb commit 251c869
Show file tree
Hide file tree
Showing 21 changed files with 610 additions and 0 deletions.
7 changes: 7 additions & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
defaults:
- params: training_params
- task: language_modeling
- model: transformer_lm
- criterion: cross_entropy
- optimizer: adam
- lr_scheduler: inverse_sqrt
7 changes: 7 additions & 0 deletions config/config_eval_lm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
defaults:
- params: eval_lm_params
- task: language_modeling
- model: transformer_lm
- criterion: cross_entropy
- optimizer: adam
- lr_scheduler: inverse_sqrt
3 changes: 3 additions & 0 deletions config/criterion/adaptive_loss.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# @package _group_
sentence_avg: ${params.optimization.sentence_avg}
ddp_backend: ${params.distributed_training.ddp_backend}
3 changes: 3 additions & 0 deletions config/criterion/cross_entropy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# @package _group_
sentence_avg: ${params.optimization.sentence_avg}
ddp_backend: ${params.distributed_training.ddp_backend}
7 changes: 7 additions & 0 deletions config/lr_scheduler/cosine.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# @package _group_
warmup_updates: 0
warmup_init_lr: -1
max_lr: 1.0
t_mult: 1.0
lr_period_updates: -1
lr_shrink: 0.1
3 changes: 3 additions & 0 deletions config/lr_scheduler/inverse_sqrt.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# @package _group_
warmup_updates: 4000
warmup_init_lr: -1
36 changes: 36 additions & 0 deletions config/model/transformer_lm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "relu"
dropout: 0.1
attention_dropout: 0.0
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 512
decoder_output_dim: 512
decoder_input_dim: 512
decoder_ffn_embed_dim: 2048
decoder_layers: 6
decoder_attention_heads: 8
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
36 changes: 36 additions & 0 deletions config/model/transformer_lm_baevski_gbw.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "relu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 512
decoder_output_dim: 512
decoder_input_dim: 512
decoder_ffn_embed_dim: 4096
decoder_layers: 12
decoder_attention_heads: 16
decoder_normalize_before: true
no_decoder_final_norm: true
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
36 changes: 36 additions & 0 deletions config/model/transformer_lm_baevski_wiki103.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "relu"
dropout: 0.3
attention_dropout: 0.1
activation_dropout: 0.1
relu_dropout: 0.1
decoder_embed_dim: 1024
decoder_output_dim: 1024
decoder_input_dim: 1024
decoder_ffn_embed_dim: 4096
decoder_layers: 16
decoder_attention_heads: 8
decoder_normalize_before: true
no_decoder_final_norm: true
adaptive_softmax_cutoff: "20000,60000"
adaptive_softmax_dropout: 0.2
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: true
adaptive_input_factor: 4
adaptive_input_cutoff: "20000,60000"
tie_adaptive_weights: true
tie_adaptive_proj: true
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
36 changes: 36 additions & 0 deletions config/model/transformer_lm_big.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "relu"
dropout: 0.1
attention_dropout: 0.0
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 1024
decoder_output_dim: 1024
decoder_input_dim: 1024
decoder_ffn_embed_dim: 4096
decoder_layers: 12
decoder_attention_heads: 16
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
36 changes: 36 additions & 0 deletions config/model/transformer_lm_gbw.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "relu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 512
decoder_output_dim: 512
decoder_input_dim: 512
decoder_ffn_embed_dim: 4096
decoder_layers: 12
decoder_attention_heads: 16
decoder_normalize_before: true
no_decoder_final_norm: true
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
36 changes: 36 additions & 0 deletions config/model/transformer_lm_gpt.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "gelu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 768
decoder_output_dim: 768
decoder_input_dim: 768
decoder_ffn_embed_dim: 3072
decoder_layers: 12
decoder_attention_heads: 12
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
36 changes: 36 additions & 0 deletions config/model/transformer_lm_gpt2_big.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "gelu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 1600
decoder_output_dim: 1600
decoder_input_dim: 1600
decoder_ffn_embed_dim: 6400
decoder_layers: 48
decoder_attention_heads: 25
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
36 changes: 36 additions & 0 deletions config/model/transformer_lm_gpt2_medium.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "gelu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 1280
decoder_output_dim: 1280
decoder_input_dim: 1280
decoder_ffn_embed_dim: 5120
decoder_layers: 36
decoder_attention_heads: 20
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
36 changes: 36 additions & 0 deletions config/model/transformer_lm_gpt2_small.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# @package _group_
activation_fn: "gelu"
dropout: 0.1
attention_dropout: 0.1
activation_dropout: 0.0
relu_dropout: 0.0
decoder_embed_dim: 1024
decoder_output_dim: 1024
decoder_input_dim: 1024
decoder_ffn_embed_dim: 4096
decoder_layers: 24
decoder_attention_heads: 16
decoder_normalize_before: true
no_decoder_final_norm: false
adaptive_softmax_cutoff: null
adaptive_softmax_dropout: 0
adaptive_softmax_factor: 4
no_token_positional_embeddings: false
share_decoder_input_output_embed: false
character_embeddings: false
character_filters: "[(1, 64), (2, 128), (3, 192), (4, 256), (5, 256), (6, 256), (7, 256)]"
character_embedding_dim: 4
char_embedder_highway_layers: 2
adaptive_input: false
adaptive_input_factor: 4
adaptive_input_cutoff: null
tie_adaptive_weights: false
tie_adaptive_proj: false
decoder_learned_pos: false
decoder_layerdrop: 0
decoder_layers_to_keep: null
layernorm_embedding: false
no_scale_embedding: false
quant_noise_pq: 0
quant_noise_pq_block_size: 8
quant_noise_scalar: 0
Loading

0 comments on commit 251c869

Please sign in to comment.