Try 4 GPU test

vllm-project · Sep 27, 2024 · 11604f0 · 11604f0
1 parent 1294396
commit 11604f0
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 5 deletions.
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -113,6 +113,7 @@ steps:
   commands:
   - pytest -v -s distributed/test_pynccl.py
   - pytest -v -s spec_decode/e2e/test_integration_dist_tp4.py
+  - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_4_gpus
 
 - label: Metrics, Tracing Test # 10min
   num_gpus: 2 
@@ -333,13 +334,13 @@ steps:
 - label: Custom Models Test
   #mirror_hardwares: [amd]
   working_dir: "/vllm-workspace/tests"
-  num_gpus: 2
+  num_gpus: 4
   optional: true
   commands:
     # PR authors can temporarily add commands below to test individual models
     # e.g. pytest -v -s models/encoder_decoder/vision_language/test_mllama.py
     # *To avoid merge conflicts, remember to REMOVE (not just comment out) them before merging the PR*
-    - pytest -v -s models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
+    - pytest -v -s models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_4_gpus
 
 #####  1 GPU test  #####
 #####  multi gpus test  #####
@@ -391,7 +392,6 @@ steps:
   # Avoid importing model tests that cause CUDA reinitialization error
   - pytest models/encoder_decoder/language/test_bart.py -v -s -m distributed_2_gpus
   - pytest models/decoder_only/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
-  - pytest models/encoder_decoder/vision_language/test_broadcast.py -v -s -m distributed_2_gpus
   - pytest -v -s spec_decode/e2e/test_integration_dist_tp2.py
   - pip install -e ./plugins/vllm_add_dummy_model
   - pytest -v -s distributed/test_distributed_oot.py

diff --git a/tests/models/encoder_decoder/vision_language/test_broadcast.py b/tests/models/encoder_decoder/vision_language/test_broadcast.py
@@ -3,7 +3,7 @@
 from ....utils import multi_gpu_test
 
 
-@multi_gpu_test(num_gpus=2)
+@multi_gpu_test(num_gpus=4)
 @pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"])
 @pytest.mark.parametrize("model", [
     "meta-llama/Llama-3.2-11B-Vision-Instruct",
@@ -14,7 +14,7 @@ def test_models(hf_runner, vllm_runner, image_assets,
     dtype = "half"
     max_tokens = 5
     num_logprobs = 5
-    tensor_parallel_size = 2
+    tensor_parallel_size = 4
 
     if model.startswith("meta-llama/Llama-3.2-11B-Vision-Instruct"):
         from .test_mllama import models, run_test