Skip to content

Commit

Permalink
Update pull.yml to test snapshot saving and loading (CPU-only)(#1486)
Browse files Browse the repository at this point in the history
* Update pull.yml to test snapshot saving and loading

test snapshot saving and loading

* Update pull.yml

Fixed typos.

* Update pull.yml

cuda-32.json because somebody would rather fail a job than accept a partil group

* Update pull.yml

Remove fp16 and fp32 int4 quantized models for now.  
@jerryzh168 Not sure why these dtypes are not compatible with int4 quantization?

* Update pull.yml

add DEVICE specification for snapshot and use device cpu
  • Loading branch information
mikekgfb authored Feb 18, 2025
1 parent b57b2be commit 384a728
Showing 1 changed file with 25 additions and 4 deletions.
29 changes: 25 additions & 4 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,16 @@ jobs:
bash .ci/scripts/validate.sh "./checkpoints/${REPO_NAME}/model.pth" "cuda" "aoti-bfloat16"
echo "::endgroup::"
echo "::group::Run inference with quantize file"
for DEVICE in cpu; do # cuda
# cuda - fails because `AttributeError: 'Linear' object has no attribute '_linear_extra_repr'`
# follow up with torchao as a separate PR
echo "saving snapshot for device ${DEVICE} and dtype bfloat16, and reloading as snapshot"
python3 torchchat.py export --device ${DEVICE} --output-snap model.tc --dtype bfloat16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
python3 torchchat.py generate --device ${DEVICE} --snap model.tc --dtype bfloat16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
done
echo "::endgroup::"
test-gpu-aoti-float32:
permissions:
id-token: write
Expand Down Expand Up @@ -335,6 +345,11 @@ jobs:
fi
echo "::endgroup::"
# echo "::group::Run inference with quantize file"
# python3 torchchat.py export --output-snap model.tc --dtype float32 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
# python3 torchchat.py generate --snap model.tc --dtype float32 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
# echo "::endgroup::"
test-gpu-aoti-float16:
permissions:
id-token: write
Expand Down Expand Up @@ -376,10 +391,15 @@ jobs:
echo "::group::Run inference with quantize file"
if [ $(uname -s) == Darwin ]; then
python3 torchchat.py export --output-aoti-package-path /tmp/model.pt2 --quantize torchchat/quant_config/cuda.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
python3 torchchat.py generate --aoti-package-path /tmp/model.pt2 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"~
fi
echo "::endgroup::"
# echo "::group::Run inference with quantize file"
# python3 torchchat.py export --output-snap model.tc --dtype float16 --quantize torchchat/quant_config/cuda-32.json --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
# python3 torchchat.py generate --snap model.tc --dtype float16 --checkpoint "./checkpoints/${REPO_NAME}/model.pth"
# echo "::endgroup::"
test-gpu-eval-sanity-check:
permissions:
id-token: write
Expand Down Expand Up @@ -495,10 +515,11 @@ jobs:
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
echo "*** --quantize torchchat/quant_config/mobile.json ***"
echo "*** can't test --quantize torchchat/quant_config/mobile.json ***"
echo "*** testing --quantize torchchat/quant_config/mobile-32.json ***"
echo "******************************************"
# python torchchat.py export --quantize torchchat/quant_config/mobile.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
# python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python torchchat.py export --quantize torchchat/quant_config/mobile-32.json --checkpoint-path ${MODEL_PATH} --output-pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
python3 torchchat.py generate --checkpoint-path ${MODEL_PATH} --temperature 0 --pte-path ${MODEL_DIR}/${MODEL_NAME}.pte
echo "******************************************"
Expand Down

0 comments on commit 384a728

Please sign in to comment.