Skip to content

Commit

Permalink
update KD config names
Browse files Browse the repository at this point in the history
  • Loading branch information
Felipe Mello committed Dec 5, 2024
1 parent 40419fd commit a1d0eda
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config llama3_1/8B_lora
#
# To launch on 2 devices, run the following command from root:
# tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed --config llama3_2/8B_to_1B_KD_distributed
# tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed --config llama3_2/8B_to_1B_KD_lora_distributed
#
# This config works best for distilling on 2+ devices.


output_dir: /tmp/torchtune/llama3_2_8B_to_1B/KD_distributed # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/llama3_2_8B_to_1B/KD_lora_distributed # /tmp may be deleted by your system. Change it to your preference.

# Model Arguments
model:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# tune run lora_finetune_single_device --config llama3_1/8B_lora_single_device
#
# To launch on a single device, run the following command from root:
# tune run knowledge_distillation_single_device --config llama3_2/8B_to_1B_KD_single_device
# tune run knowledge_distillation_single_device --config llama3_2/8B_to_1B_KD_lora_single_device
#
# This config works only for training on single device.


output_dir: /tmp/torchtune/llama3_2_8B_to_1B/KD_single_device # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/llama3_2_8B_to_1B/KD_lora_single_device # /tmp may be deleted by your system. Change it to your preference.

# Model Arguments
model:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# tune run --nnodes 1 --nproc_per_node 2 lora_finetune_distributed --config qwen2/1.5B_lora
#
# To launch on 2 devices, run the following command from root:
# tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed --config qwen2/1.5_to_0.5B_KD_distributed
# tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed --config qwen2/1.5_to_0.5B_KD_lora_distributed
#
# This config works best for distilling on 2+ devices.


output_dir: /tmp/torchtune/qwen2_1_5_to_0_5B/KD_distributed # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/qwen2_1_5_to_0_5B/KD_lora_distributed # /tmp may be deleted by your system. Change it to your preference.

# Model Arguments
model:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
# tune run lora_finetune_single_device --config qwen2/1.5B_lora_single_device
#
# To launch on a single device, run the following command from root:
# tune run knowledge_distillation_single_device --config qwen2/1.5_to_0.5B_KD_single_device
# tune run knowledge_distillation_single_device --config qwen2/1.5_to_0.5B_KD_lora_single_device
#
# This config works only for distilling on a single device.


output_dir: /tmp/torchtune/qwen2_1_5_to_0_5B/KD_single_device # /tmp may be deleted by your system. Change it to your preference.
output_dir: /tmp/torchtune/qwen2_1_5_to_0_5B/KD_lora_single_device # /tmp may be deleted by your system. Change it to your preference.

# Model Arguments
model:
Expand Down
8 changes: 4 additions & 4 deletions tests/recipes/test_knowledge_distillation_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_loss(self, tmpdir, monkeypatch):

cmd = f"""
tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed \
--config llama3_2/8B_to_1B_KD_distributed \
--config llama3_2/8B_to_1B_KD_lora_distributed \
output_dir={tmpdir} \
checkpointer._component_=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
Expand Down Expand Up @@ -120,7 +120,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
# Train for two epochs
cmd_1 = f"""
tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed \
--config llama3_2/8B_to_1B_KD_distributed \
--config llama3_2/8B_to_1B_KD_lora_distributed \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
Expand Down Expand Up @@ -148,7 +148,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
# Resume training
cmd_2 = f"""
tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed \
--config llama3_2/8B_to_1B_KD_distributed \
--config llama3_2/8B_to_1B_KD_lora_distributed \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir={tmpdir} \
Expand Down Expand Up @@ -199,7 +199,7 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):

cmd = f"""
tune run --nnodes 1 --nproc_per_node 2 knowledge_distillation_distributed \
--config llama3_2/8B_to_1B_KD_distributed \
--config llama3_2/8B_to_1B_KD_lora_distributed \
output_dir={tmpdir} \
checkpointer._component_={ckpt_component} \
checkpointer.checkpoint_dir='{ckpt_dir}' \
Expand Down
8 changes: 4 additions & 4 deletions tests/recipes/test_knowledge_distillation_single_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_loss(
tmpdir,
monkeypatch,
):
config = "qwen2/1.5_to_0.5B_KD_single_device"
config = "qwen2/1.5_to_0.5B_KD_lora_single_device"
model_type = "llama3"
ckpt_type = "tune"
ckpt_component = CKPT_COMPONENT_MAP[ckpt_type]
Expand Down Expand Up @@ -152,7 +152,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
# Train for two epochs
cmd_1 = f"""
tune run knowledge_distillation_single_device \
--config qwen2/1.5_to_0.5B_KD_single_device \
--config qwen2/1.5_to_0.5B_KD_lora_single_device \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir='{ckpt_dir}' \
Expand Down Expand Up @@ -186,7 +186,7 @@ def test_training_state_on_resume(self, tmpdir, monkeypatch):
# Resume training
cmd_2 = f"""
tune run knowledge_distillation_single_device \
--config qwen2/1.5_to_0.5B_KD_single_device \
--config qwen2/1.5_to_0.5B_KD_lora_single_device \
output_dir={tmpdir} \
checkpointer=torchtune.training.FullModelTorchTuneCheckpointer \
checkpointer.checkpoint_dir={tmpdir} \
Expand Down Expand Up @@ -242,7 +242,7 @@ def test_save_and_load_merged_weights(self, tmpdir, monkeypatch):

cmd = f"""
tune run knowledge_distillation_single_device \
--config qwen2/1.5_to_0.5B_KD_single_device \
--config qwen2/1.5_to_0.5B_KD_lora_single_device \
output_dir={tmpdir} \
checkpointer._component_={ckpt_component} \
checkpointer.checkpoint_dir='{ckpt_dir}' \
Expand Down
16 changes: 8 additions & 8 deletions torchtune/_recipe_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,12 +480,12 @@ class Recipe:
file_path="knowledge_distillation_single_device.py",
configs=[
Config(
name="qwen2/1.5_to_0.5B_KD_single_device",
file_path="qwen2/1.5_to_0.5B_KD_single_device.yaml",
name="qwen2/1.5_to_0.5B_KD_lora_single_device",
file_path="qwen2/1.5_to_0.5B_KD_lora_single_device.yaml",
),
Config(
name="llama3_2/8B_to_1B_KD_single_device",
file_path="llama3_2/8B_to_1B_KD_single_device.yaml",
name="llama3_2/8B_to_1B_KD_lora_single_device",
file_path="llama3_2/8B_to_1B_KD_lora_single_device.yaml",
),
],
supports_distributed=False,
Expand All @@ -495,12 +495,12 @@ class Recipe:
file_path="knowledge_distillation_distributed.py",
configs=[
Config(
name="qwen2/1.5_to_0.5B_KD_distributed",
file_path="qwen2/1.5_to_0.5B_KD_distributed.yaml",
name="qwen2/1.5_to_0.5B_KD_lora_distributed",
file_path="qwen2/1.5_to_0.5B_KD_lora_distributed.yaml",
),
Config(
name="llama3_2/8B_to_1B_KD_distributed",
file_path="llama3_2/8B_to_1B_KD_distributed.yaml",
name="llama3_2/8B_to_1B_KD_lora_distributed",
file_path="llama3_2/8B_to_1B_KD_lora_distributed.yaml",
),
],
supports_distributed=True,
Expand Down

0 comments on commit a1d0eda

Please sign in to comment.