update KD config names

pytorch · Dec 5, 2024 · a1d0eda · a1d0eda
1 parent 40419fd
commit a1d0eda
Show file tree

Hide file tree

Showing 7 changed files with 24 additions and 24 deletions.
diff --git a/...igs/llama3_2/8B_to_1B_KD_distributed.yaml → ...lama3_2/8B_to_1B_KD_lora_distributed.yaml b/...igs/llama3_2/8B_to_1B_KD_distributed.yaml → ...lama3_2/8B_to_1B_KD_lora_distributed.yaml
@@ -10,12 +10,12 @@
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama3_1/8B_lora
 #
 # To launch on 2 devices, run the following command from root:
-#   tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed --config llama3_2/8B_to_1B_KD_distributed
+#   tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed --config llama3_2/8B_to_1B_KD_lora_distributed
 #
 # This config works best for distilling on 2+ devices.
 
 
-output_dir: /tmp/torchtune/llama3_2_8B_to_1B/KD_distributed # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/llama3_2_8B_to_1B/KD_lora_distributed # /tmp may be deleted by your system. Change it to your preference.
 
 # Model Arguments
 model:

diff --git a/...s/llama3_2/8B_to_1B_KD_single_device.yaml → ...ma3_2/8B_to_1B_KD_lora_single_device.yaml b/...s/llama3_2/8B_to_1B_KD_single_device.yaml → ...ma3_2/8B_to_1B_KD_lora_single_device.yaml
@@ -10,12 +10,12 @@
 #   tune run lora_finetune_single_device --config llama3_1/8B_lora_single_device
 #
 # To launch on a single device, run the following command from root:
-#   tune run knowledge_distillation_single_device --config llama3_2/8B_to_1B_KD_single_device
+#   tune run knowledge_distillation_single_device --config llama3_2/8B_to_1B_KD_lora_single_device
 #
 # This config works only for training on single device.
 
 
-output_dir: /tmp/torchtune/llama3_2_8B_to_1B/KD_single_device # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/llama3_2_8B_to_1B/KD_lora_single_device # /tmp may be deleted by your system. Change it to your preference.
 
 # Model Arguments
 model:

diff --git a/...igs/qwen2/1.5_to_0.5B_KD_distributed.yaml → ...wen2/1.5_to_0.5B_KD_lora_distributed.yaml b/...igs/qwen2/1.5_to_0.5B_KD_distributed.yaml → ...wen2/1.5_to_0.5B_KD_lora_distributed.yaml
@@ -10,12 +10,12 @@
 #   tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2/1.5B_lora
 #
 # To launch on 2 devices, run the following command from root:
-#   tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed --config qwen2/1.5_to_0.5B_KD_distributed
+#   tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed --config qwen2/1.5_to_0.5B_KD_lora_distributed
 #
 # This config works best for distilling on 2+ devices.
 
 
-output_dir: /tmp/torchtune/qwen2_1_5_to_0_5B/KD_distributed # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/qwen2_1_5_to_0_5B/KD_lora_distributed # /tmp may be deleted by your system. Change it to your preference.
 
 # Model Arguments
 model:

diff --git a/...s/qwen2/1.5_to_0.5B_KD_single_device.yaml → ...n2/1.5_to_0.5B_KD_lora_single_device.yaml b/...s/qwen2/1.5_to_0.5B_KD_single_device.yaml → ...n2/1.5_to_0.5B_KD_lora_single_device.yaml
@@ -10,12 +10,12 @@
 #   tune run lora_finetune_single_device --config qwen2/1.5B_lora_single_device
 #
 # To launch on a single device, run the following command from root:
-#   tune run knowledge_distillation_single_device --config qwen2/1.5_to_0.5B_KD_single_device
+#   tune run knowledge_distillation_single_device --config qwen2/1.5_to_0.5B_KD_lora_single_device
 #
 # This config works only for distilling on a single device.
 
 
-output_dir: /tmp/torchtune/qwen2_1_5_to_0_5B/KD_single_device # /tmp may be deleted by your system. Change it to your preference.
+output_dir: /tmp/torchtune/qwen2_1_5_to_0_5B/KD_lora_single_device # /tmp may be deleted by your system. Change it to your preference.
 
 # Model Arguments
 model:

diff --git a/tests/recipes/test_knowledge_distillation_distributed.py b/tests/recipes/test_knowledge_distillation_distributed.py
@@ -62,7 +62,7 @@ def test_loss(self, tmpdir, monkeypatch):
 
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed \
-            --config llama3_2/8B_to_1B_KD_distributed \
+            --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
             checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
@@ -120,7 +120,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         # Train for two epochs
         cmd_1 = f"""
         tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed \
-            --config llama3_2/8B_to_1B_KD_distributed \
+            --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
             checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
@@ -148,7 +148,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         # Resume training
         cmd_2 = f"""
         tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed \
-            --config llama3_2/8B_to_1B_KD_distributed \
+            --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
             checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={tmpdir} \
@@ -199,7 +199,7 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
 
         cmd = f"""
         tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed \
-            --config llama3_2/8B_to_1B_KD_distributed \
+            --config llama3_2/8B_to_1B_KD_lora_distributed \
             output_dir={tmpdir} \
             checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \

diff --git a/tests/recipes/test_knowledge_distillation_single_device.py b/tests/recipes/test_knowledge_distillation_single_device.py
@@ -65,7 +65,7 @@ def test_loss(
         tmpdir,
         monkeypatch,
     ):
-        config = "qwen2/1.5_to_0.5B_KD_single_device"
+        config = "qwen2/1.5_to_0.5B_KD_lora_single_device"
         model_type = "llama3"
         ckpt_type = "tune"
         ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
@@ -152,7 +152,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         # Train for two epochs
         cmd_1 = f"""
         tune run knowledge_distillation_single_device \
-            --config qwen2/1.5_to_0.5B_KD_single_device \
+            --config qwen2/1.5_to_0.5B_KD_lora_single_device \
             output_dir={tmpdir} \
             checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir='{ckpt_dir}' \
@@ -186,7 +186,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
         # Resume training
         cmd_2 = f"""
         tune run knowledge_distillation_single_device \
-            --config qwen2/1.5_to_0.5B_KD_single_device \
+            --config qwen2/1.5_to_0.5B_KD_lora_single_device \
             output_dir={tmpdir} \
             checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
             checkpointer.checkpoint_dir={tmpdir} \
@@ -242,7 +242,7 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):
 
         cmd = f"""
         tune run knowledge_distillation_single_device \
-            --config qwen2/1.5_to_0.5B_KD_single_device \
+            --config qwen2/1.5_to_0.5B_KD_lora_single_device \
             output_dir={tmpdir} \
             checkpointer._component_={ckpt_component} \
             checkpointer.checkpoint_dir='{ckpt_dir}' \

diff --git a/torchtune/_recipe_registry.py b/torchtune/_recipe_registry.py
@@ -480,12 +480,12 @@ class Recipe:
         file_path="knowledge_distillation_single_device.py",
         configs=[
             Config(
-                name="qwen2/1.5_to_0.5B_KD_single_device",
-                file_path="qwen2/1.5_to_0.5B_KD_single_device.yaml",
+                name="qwen2/1.5_to_0.5B_KD_lora_single_device",
+                file_path="qwen2/1.5_to_0.5B_KD_lora_single_device.yaml",
             ),
             Config(
-                name="llama3_2/8B_to_1B_KD_single_device",
-                file_path="llama3_2/8B_to_1B_KD_single_device.yaml",
+                name="llama3_2/8B_to_1B_KD_lora_single_device",
+                file_path="llama3_2/8B_to_1B_KD_lora_single_device.yaml",
             ),
         ],
         supports_distributed=False,
@@ -495,12 +495,12 @@ class Recipe:
         file_path="knowledge_distillation_distributed.py",
         configs=[
             Config(
-                name="qwen2/1.5_to_0.5B_KD_distributed",
-                file_path="qwen2/1.5_to_0.5B_KD_distributed.yaml",
+                name="qwen2/1.5_to_0.5B_KD_lora_distributed",
+                file_path="qwen2/1.5_to_0.5B_KD_lora_distributed.yaml",
             ),
             Config(
-                name="llama3_2/8B_to_1B_KD_distributed",
-                file_path="llama3_2/8B_to_1B_KD_distributed.yaml",
+                name="llama3_2/8B_to_1B_KD_lora_distributed",
+                file_path="llama3_2/8B_to_1B_KD_lora_distributed.yaml",
             ),
         ],
         supports_distributed=True,