generated from allenai/python-package-template
-
Notifications
You must be signed in to change notification settings - Fork 397
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8f001bf
commit cf3b377
Showing
1 changed file
with
49 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -ex | ||
|
||
# check if jq is installed | ||
if ! command -v jq &> /dev/null | ||
then | ||
echo "jq could not be found. Please install it." | ||
exit | ||
fi | ||
|
||
|
||
EXTRA_ARGS="-c pdelfin/train/config/molmo-7b-lora.yaml --num_proc 64 --save.path \"s3://ai2-oe-data/jakep/experiments/molmo-pdf/v1/models/\${BEAKER_USER_ID}\"" | ||
|
||
run_name=$(basename "$0" .sh) | ||
|
||
# --cluster 'ai2/jupiter*' \ | ||
# --cluster 'ai2/pluto*' \ | ||
# --cluster 'ai2/allennlp-cirrascale' \ | ||
# --priority high \ | ||
|
||
CLUSTER='jupiter' | ||
|
||
gantry run \ | ||
--description "${run_name}"\ | ||
--task-name "${run_name}"\ | ||
--allow-dirty \ | ||
--host-networking \ | ||
--workspace ai2/oe-data-model-based-cleanup \ | ||
--beaker-image 'jakep/jakep-pdf-finetunev1.2' \ | ||
--venv 'base' \ | ||
--pip gantry-requirements.txt \ | ||
--priority high \ | ||
--gpus 8 \ | ||
--preemptible \ | ||
--cluster "ai2/${CLUSTER}*" \ | ||
--budget ai2/oe-data \ | ||
--weka "oe-data-default:/data" \ | ||
--env LOG_FILTER_TYPE=local_rank0_only \ | ||
--env OMP_NUM_THREADS=8 \ | ||
--env BEAKER_USER_ID=$(beaker account whoami --format json | jq '.[0].name' -cr) \ | ||
--env-secret AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \ | ||
--env-secret AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \ | ||
--env-secret DS_AWS_ACCESS_KEY_ID=S2_AWS_ACCESS_KEY_ID \ | ||
--env-secret DS_AWS_SECRET_ACCESS_KEY=S2_AWS_SECRET_ACCESS_KEY \ | ||
--env-secret WANDB_API_KEY=JAKE_WANDB_API_KEY \ | ||
--shared-memory 10GiB \ | ||
--yes \ | ||
-- /bin/bash -c "source scripts/beaker/${CLUSTER}-ib.sh && python -m pdelfin.train.loaddataset ${EXTRA_ARGS} && accelerate launch --multi_gpu --num_processes \${BEAKER_ASSIGNED_GPU_COUNT} --mixed_precision bf16 -m pdelfin.train.train ${EXTRA_ARGS}" |