Skip to content

Commit 86aaffe

Browse files
committed
do not land: local setup
Summary: Test Plan:
1 parent 8248344 commit 86aaffe

File tree

3 files changed

+36
-4
lines changed

3 files changed

+36
-4
lines changed

blog/llm-finetuning-4/finetune.py

+24
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import evaluate
77
import torch
88
import transformers
9+
import wandb
910
from determined.transformers import DetCallback
1011
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model
1112
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
@@ -173,6 +174,29 @@ def compute_metrics(eval_preds):
173174
distributed = det.core.DistributedContext.from_torch_distributed()
174175

175176
with det.core.init(distributed=distributed) as core_context:
177+
if core_context.distributed.rank == 0:
178+
wandb.login(key=hparams["wandb_key"])
179+
import uuid
180+
# Generate a UUID
181+
my_uuid = uuid.uuid4()
182+
# Convert UUID to string
183+
uuid_str = str(my_uuid)[:5]
184+
r = hparams["r"]
185+
lora_alpha = hparams["lora_alpha_in_r"]
186+
lora_dropout = hparams["lora_dropout"]
187+
dataset_subset = hparams["dataset_subset"]
188+
run_name = f"test_lora_blog_{dataset_subset}_r_{r}_alpha_in_r_{lora_alpha}_dropout_{lora_dropout}_{uuid_str}"
189+
run = wandb.init(
190+
project="lora-blog",
191+
name=run_name,
192+
config={
193+
"r":hparams["r"],
194+
"lora_alpha_in_r":hparams["lora_alpha_in_r"],
195+
"dropout":hparams["lora_dropout"],
196+
"dataset_subset":hparams["dataset_subset"],
197+
"model":hparams["model"]
198+
}
199+
)
176200
det_callback = DetCallback(
177201
core_context,
178202
training_args,

blog/llm-finetuning-4/lora.yaml

+10-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: mistral lora easy
1+
name: mistral lora hard
22
debug: false
33
environment:
44
environment_variables:
@@ -7,7 +7,10 @@ environment:
77
gpu: determinedai/environments:cuda-11.8-pytorch-2.0-gpu-95c7a14
88
cpu: determinedai/environments:py-3.10-pytorch-2.0-cpu-03ae7d7
99
resources:
10-
slots_per_trial: 2
10+
slots_per_trial: 4
11+
resource_pool: a100
12+
workspace: swy_5
13+
project: lora-blog
1114
searcher:
1215
name: grid
1316
max_length:
@@ -17,7 +20,7 @@ searcher:
1720
hyperparameters:
1821
model: "mistralai/Mistral-7B-Instruct-v0.2"
1922
model_commit_hash: "99259002b41e116d28ccb2d04a9fbe22baed0c7f"
20-
dataset_subset: "easy"
23+
dataset_subset: "hard"
2124
lora: true
2225
# Tunable hyperparameters
2326
r:
@@ -30,10 +33,12 @@ hyperparameters:
3033
type: categorical
3134
vals: [0.1]
3235
# End tunable hyperparameters
36+
hf_token: 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
37+
wandb_key: f0180f6bb036d27ad08649574e991121a5192dbf
3338
training_args:
3439
output_dir: "/tmp/llm_finetuning"
3540
max_steps: 5000
36-
per_device_train_batch_size: 8
41+
per_device_train_batch_size: 4
3742
per_device_eval_batch_size: 4
3843
bf16: true
3944
evaluation_strategy: "steps"
@@ -43,6 +48,7 @@ hyperparameters:
4348
save_strategy: "steps"
4449
save_steps: 1000
4550
learning_rate: 1e-5
51+
report_to: "wandb"
4652
entrypoint: >-
4753
python -m determined.launch.torch_distributed
4854
python finetune.py

blog/llm-finetuning-4/startup-hook.sh

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
#!/bin/bash
22
pip install --upgrade pip
33
pip install -r requirements.txt
4+
# local requirements
5+
pip install wandb

0 commit comments

Comments
 (0)