Merge pull request #40 from alan-turing-institute/experiment2

Experiment2
alan-turing-institute · Oct 16, 2024 · 3a188ed · 3a188ed
2 parents eb812ca + 06587f6
commit 3a188ed
Show file tree

Hide file tree

Showing 43 changed files with 580 additions and 12 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,9 +5,6 @@ wandb/
 # Editor
 .vscode/
 
-# For models
-temp*
-
 # For eval
 **.sh
 
@@ -21,6 +18,7 @@ __pycache__/
 
 # Temp
 temp*
+tmp*
 
 # Bash scripts
 *.sh

diff --git a/configs/data/gen_tofu_author_10.yaml b/configs/data/gen_tofu_author_10.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: author
   forget_fraction: 0.15
diff --git a/configs/data/gen_tofu_author_15.yaml b/configs/data/gen_tofu_author_15.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: author
   forget_fraction: 0.225
diff --git a/configs/data/gen_tofu_author_20.yaml b/configs/data/gen_tofu_author_20.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: author
   forget_fraction: 0.305
diff --git a/configs/data/gen_tofu_author_25.yaml b/configs/data/gen_tofu_author_25.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: author
   forget_fraction: 0.385
diff --git a/configs/data/gen_tofu_author_5.yaml b/configs/data/gen_tofu_author_5.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: author
   forget_fraction: 0.075
diff --git a/configs/data/gen_tofu_book_10.yaml b/configs/data/gen_tofu_book_10.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: book
   forget_fraction: 0.114
diff --git a/configs/data/gen_tofu_book_15.yaml b/configs/data/gen_tofu_book_15.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: book
   forget_fraction: 0.174
diff --git a/configs/data/gen_tofu_book_20.yaml b/configs/data/gen_tofu_book_20.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: book
   forget_fraction: 0.236
diff --git a/configs/data/gen_tofu_book_25.yaml b/configs/data/gen_tofu_book_25.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: book
   forget_fraction: 0.298
diff --git a/configs/data/gen_tofu_book_5.yaml b/configs/data/gen_tofu_book_5.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: book
   forget_fraction: 0.055
diff --git a/configs/data/gen_tofu_full.yaml b/configs/data/gen_tofu_full.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: question
   forget_fraction: 0.
diff --git a/configs/data/gen_tofu_publisher_10.yaml b/configs/data/gen_tofu_publisher_10.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: publisher
   forget_fraction: 0.4
diff --git a/configs/data/gen_tofu_publisher_15.yaml b/configs/data/gen_tofu_publisher_15.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: publisher
   forget_fraction: 0.595
diff --git a/configs/data/gen_tofu_publisher_20.yaml b/configs/data/gen_tofu_publisher_20.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: publisher
   forget_fraction: 0.795
diff --git a/configs/data/gen_tofu_publisher_25.yaml b/configs/data/gen_tofu_publisher_25.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: publisher
   forget_fraction: 0.995
diff --git a/configs/data/gen_tofu_publisher_5.yaml b/configs/data/gen_tofu_publisher_5.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: publisher
   forget_fraction: 0.2
diff --git a/configs/data/gen_tofu_question_10.yaml b/configs/data/gen_tofu_question_10.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: question
   forget_fraction: 0.1
diff --git a/configs/data/gen_tofu_question_15.yaml b/configs/data/gen_tofu_question_15.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: question
   forget_fraction: 0.15
diff --git a/configs/data/gen_tofu_question_20.yaml b/configs/data/gen_tofu_question_20.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: question
   forget_fraction: 0.2
diff --git a/configs/data/gen_tofu_question_25.yaml b/configs/data/gen_tofu_question_25.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: question
   forget_fraction: 0.25
diff --git a/configs/data/gen_tofu_question_5.yaml b/configs/data/gen_tofu_question_5.yaml
@@ -1,4 +1,5 @@
 dataset_name: gen_tofu
 data_kwargs:
+  type: granularity
   granularity: question
   forget_fraction: 0.05
diff --git a/configs/data/gen_tofu_rel_1.yaml b/configs/data/gen_tofu_rel_1.yaml
@@ -0,0 +1,4 @@
+dataset_name: gen_tofu
+data_kwargs:
+  type: relationship
+  forget_fraction: 0.005
diff --git a/configs/data/gen_tofu_rel_2.yaml b/configs/data/gen_tofu_rel_2.yaml
@@ -0,0 +1,4 @@
+dataset_name: gen_tofu
+data_kwargs:
+  type: relationship
+  forget_fraction: 0.01
diff --git a/configs/data/gen_tofu_rel_3.yaml b/configs/data/gen_tofu_rel_3.yaml
@@ -0,0 +1,4 @@
+dataset_name: gen_tofu
+data_kwargs:
+  type: relationship
+  forget_fraction: 0.02
diff --git a/configs/data/gen_tofu_rel_4.yaml b/configs/data/gen_tofu_rel_4.yaml
@@ -0,0 +1,4 @@
+dataset_name: gen_tofu
+data_kwargs:
+  type: relationship
+  forget_fraction: 0.03
diff --git a/configs/data/gen_tofu_rel_5.yaml b/configs/data/gen_tofu_rel_5.yaml
@@ -0,0 +1,4 @@
+dataset_name: gen_tofu
+data_kwargs:
+  type: relationship
+  forget_fraction: 0.04
diff --git a/configs/data/gen_tofu_rel_6.yaml b/configs/data/gen_tofu_rel_6.yaml
@@ -0,0 +1,4 @@
+dataset_name: gen_tofu
+data_kwargs:
+  type: relationship
+  forget_fraction: 0.05
diff --git a/configs/experiment/experiment_1_granularity_gpt2_remaining.yaml b/configs/experiment/experiment_1_granularity_gpt2_remaining.yaml
@@ -0,0 +1,60 @@
+# Combinations to build runs over
+combinations:
+  data_config:
+    - gen_tofu_question_5
+    - gen_tofu_question_10
+    - gen_tofu_question_15
+    - gen_tofu_question_20
+    - gen_tofu_question_25
+    - gen_tofu_book_10
+    - gen_tofu_book_15
+    - gen_tofu_book_20
+    - gen_tofu_book_25
+    - gen_tofu_author_10
+    - gen_tofu_author_15
+    - gen_tofu_author_20
+    - gen_tofu_author_25
+    - gen_tofu_publisher_10
+    - gen_tofu_publisher_15
+    - gen_tofu_publisher_20
+    - gen_tofu_publisher_25
+
+  train_config:
+    - longer
+
+  forget_config:
+    - [ascent, shorter]
+    - [difference, shorter]
+    - [idk, shorter]
+    - [kl, shorter]
+
+  seed:
+    - 40
+    - 41
+    - 42
+    - 43
+    - 44
+
+model_config: gpt2
+
+# Full data config: which dataset to use to build full model to do forgetting on
+full_data_config: gen_tofu_full
+
+# Baskerville kwargs
+use_bask: true
+model_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/models
+data_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/datasets
+wandb_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/wandb
+bask:
+  walltime: '0-5:0:0'
+  gpu_number: 1
+  node_number: 1
+
+# Wandb kwargs
+wandb_kwargs:
+  use_wandb: true
+  wandb_config:
+    entity: turing-arc
+    project: selective-forgetting
+    log_model: "false"
+    group: experiment-1-granularity
diff --git a/configs/experiment/experiment_1_granularity_llama_seeds.yaml b/configs/experiment/experiment_1_granularity_llama_seeds.yaml
@@ -0,0 +1,46 @@
+# Combinations to build runs over
+combinations:
+  data_config:
+    - gen_tofu_book_15
+    - gen_tofu_author_15
+    - gen_tofu_publisher_15
+
+  train_config:
+    - default
+
+  forget_config:
+    - [difference, default]
+    - [idk, default]
+    - [difference, shorter]
+    - [idk, shorter]
+
+  seed:
+    - 41
+    - 42
+    - 43
+    - 44
+
+model_config: Meta-Llama-3.1-8B-Instruct
+
+# Full data config: which dataset to use to build full model to do forgetting on
+full_data_config: gen_tofu_full
+
+# Baskerville kwargs
+use_bask: true
+model_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/models
+data_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/datasets
+wandb_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/wandb
+
+bask:
+  walltime: '0-12:0:0'
+  gpu_number: 1
+  node_number: 1
+
+# Wandb kwargs
+wandb_kwargs:
+  use_wandb: true
+  wandb_config:
+    entity: turing-arc
+    project: selective-forgetting
+    log_model: "false"
+    group: experiment-1-granularity
diff --git a/configs/experiment/experiment_1_granularity_phi_seeds.yaml b/configs/experiment/experiment_1_granularity_phi_seeds.yaml
@@ -0,0 +1,46 @@
+# Combinations to build runs over
+combinations:
+  data_config:
+    - gen_tofu_book_15
+    - gen_tofu_author_15
+    - gen_tofu_publisher_15
+
+  train_config:
+    - default
+
+  forget_config:
+    - [difference, default]
+    - [idk, default]
+    - [difference, shorter]
+    - [idk, shorter]
+
+  seed:
+    - 41
+    - 42
+    - 43
+    - 44
+
+model_config: Phi-3-mini-4k-instruct
+
+# Full data config: which dataset to use to build full model to do forgetting on
+full_data_config: gen_tofu_full
+
+# Baskerville kwargs
+use_bask: true
+model_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/models
+data_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/datasets
+wandb_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/wandb
+
+bask:
+  walltime: '0-12:0:0'
+  gpu_number: 1
+  node_number: 1
+
+# Wandb kwargs
+wandb_kwargs:
+  use_wandb: true
+  wandb_config:
+    entity: turing-arc
+    project: selective-forgetting
+    log_model: "false"
+    group: experiment-1-granularity
diff --git a/configs/experiment/experiment_2_relationships_gpt2.yaml b/configs/experiment/experiment_2_relationships_gpt2.yaml
@@ -0,0 +1,55 @@
+# Combinations to build runs over
+combinations:
+  data_config:
+    - gen_tofu_rel_1
+    - gen_tofu_rel_2
+    - gen_tofu_rel_3
+    - gen_tofu_rel_4
+    - gen_tofu_rel_5
+    - gen_tofu_rel_6
+
+  train_config:
+    - longer
+
+  forget_config:
+    - [ascent, longer]
+    - [difference, longer]
+    - [idk, longer]
+    - [kl, longer]
+
+  seed:
+    - 40
+    - 41
+    - 42
+    - 43
+    - 44
+    - 45
+    - 46
+    - 47
+    - 48
+    - 49
+
+model_config: gpt2
+
+# Full data config: which dataset to use to build full model to do forgetting on
+full_data_config: gen_tofu_full
+
+# Baskerville kwargs
+use_bask: true
+model_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/models
+data_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/datasets
+wandb_cache_dir: /bask/projects/v/vjgo8416-sltv-forget/caches/wandb
+
+bask:
+  walltime: '0-5:0:0'
+  gpu_number: 1
+  node_number: 1
+
+# Wandb kwargs
+wandb_kwargs:
+  use_wandb: true
+  wandb_config:
+    entity: turing-arc
+    project: selective-forgetting
+    log_model: "false"
+    group: experiment-2-relationship