Skip to content

Commit

Permalink
exp 2 large model configs
Browse files Browse the repository at this point in the history
  • Loading branch information
jack89roberts committed Aug 16, 2024
1 parent c5ed520 commit b118080
Show file tree
Hide file tree
Showing 8 changed files with 165 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,37 @@
combinations:
data_config:
- gen_tofu_rel_1
- gen_tofu_rel_2
- gen_tofu_rel_3
- gen_tofu_rel_4
- gen_tofu_rel_5
- gen_tofu_rel_6

train_config:
- longer
- default

forget_config:
- [ascent, shorter]
- [difference, default]
- [idk, default]
- [difference, shorter]
- [idk, shorter]
- [kl, shorter]

seed:
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49

model_config: gpt2
model_config: Meta-Llama-3.1-8B-Instruct

# Full data config: which dataset to use to build full model to do forgetting on
full_data_config: gen_tofu_full

# Baskerville kwargs
use_bask: true
model_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/models
data_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/datasets
wandb_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/wandb

bask:
walltime: '0-5:0:0'
walltime: '0-12:0:0'
gpu_number: 1
node_number: 1

Expand Down
46 changes: 46 additions & 0 deletions configs/experiment/experiment_2_relationships_phi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Combinations to build runs over
combinations:
data_config:
- gen_tofu_rel_1
- gen_tofu_rel_3
- gen_tofu_rel_6

train_config:
- default

forget_config:
- [difference, accumulate]
- [idk, default]
- [difference, shorter_accumulate]
- [idk, shorter]

seed:
- 40
- 41
- 42
- 43

model_config: Phi-3-mini-4k-instruct

# Full data config: which dataset to use to build full model to do forgetting on
full_data_config: gen_tofu_full

# Baskerville kwargs
use_bask: true
model_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/models
data_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/datasets
wandb_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/wandb

bask:
walltime: '0-12:0:0'
gpu_number: 1
node_number: 1

# Wandb kwargs
wandb_kwargs:
use_wandb: true
wandb_config:
entity: turing-arc
project: selective-forgetting
log_model: "false"
group: experiment-2-relationship
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ trainer_kwargs: # passed to TrainingArguments

# Batch size
per_device_train_batch_size: 8
per_device_eval_batch_size: 8
per_device_eval_batch_size: 16
gradient_accumulation_steps: 1

# Core hyperparameters
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
trainer_kwargs: # passed to TrainingArguments
# Memory optimization
bf16: True

# Batch size
per_device_train_batch_size: 8
per_device_eval_batch_size: 16
gradient_accumulation_steps: 1

# Core hyperparameters
learning_rate: 1.e-4
num_train_epochs: 5
weight_decay: 0.01
warmup_ratio: 0.2 # 0.2 for 1 epoch (when running for 5 epochs)

# Evaluation
eval_strategy: steps
eval_steps: 0.2

# Logging
logging_strategy: steps
logging_steps: 0.2


# Early stopping
save_strategy: epoch
save_total_limit: 1

# Outputs
output_dir: output

peft_kwargs: # passed to LoraConfig
r: 8
lora_alpha: 32
lora_dropout: 0.05
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ trainer_kwargs: # passed to TrainingArguments

# Batch size
per_device_train_batch_size: 8
per_device_eval_batch_size: 16
per_device_eval_batch_size: 32
gradient_accumulation_steps: 2

# Core hyperparameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ trainer_kwargs: # passed to TrainingArguments

# Batch size
per_device_train_batch_size: 16
per_device_eval_batch_size: 16
per_device_eval_batch_size: 32
gradient_accumulation_steps: 1

# Core hyperparameters
Expand Down
36 changes: 36 additions & 0 deletions configs/model/Phi-3-mini-4k-instruct/hyperparameters/shorter.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
trainer_kwargs: # passed to TrainingArguments
# Memory optimization
bf16: True

# Batch size
per_device_train_batch_size: 16
per_device_eval_batch_size: 32
gradient_accumulation_steps: 1

# Core hyperparameters
learning_rate: 1.e-4
num_train_epochs: 5
weight_decay: 0.01
warmup_ratio: 0.2 # 0.2 for 1 epoch (when running for 5 epochs)

# Evaluation
eval_strategy: steps
eval_steps: 0.2

# Logging
logging_strategy: steps
logging_steps: 0.2


# Early stopping
save_strategy: epoch
save_total_limit: 1

# Outputs
output_dir: output

peft_kwargs: # passed to LoraConfig
r: 8
lora_alpha: 8
lora_dropout: 0.05
target_modules: ["k_proj", "q_proj", "v_proj", "o_proj", "gate_proj", "down_proj", "up_proj"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
trainer_kwargs: # passed to TrainingArguments
# Memory optimization
bf16: True

# Batch size
per_device_train_batch_size: 8
per_device_eval_batch_size: 32
gradient_accumulation_steps: 2

# Core hyperparameters
learning_rate: 1.e-4
num_train_epochs: 5
weight_decay: 0.01
warmup_ratio: 0.2 # 0.2 for 1 epoch (when running for 5 epochs)

# Evaluation
eval_strategy: steps
eval_steps: 0.2

# Logging
logging_strategy: steps
logging_steps: 0.2


# Early stopping
save_strategy: epoch
save_total_limit: 1

# Outputs
output_dir: output

peft_kwargs: # passed to LoraConfig
r: 8
lora_alpha: 8
lora_dropout: 0.05
target_modules: ["k_proj", "q_proj", "v_proj", "o_proj", "gate_proj", "down_proj", "up_proj"]

0 comments on commit b118080

Please sign in to comment.