diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 9991060a31621..aa2509f22f0cd 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -92,7 +92,9 @@ steps: - VLLM_ATTENTION_BACKEND=FLASH_ATTN pytest -v -s basic_correctness/test_chunked_prefill.py - label: Core Test # 10min + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] + amd_gpus: 4 # Just for the sake of queue testing fast_check: true source_file_dependencies: - vllm/core @@ -105,6 +107,7 @@ steps: working_dir: "/vllm-workspace/tests" fast_check: true mirror_hardwares: [amd] + amd_gpus: 2 # Just for the sake of queue testing source_file_dependencies: - vllm/ - tests/entrypoints/llm @@ -186,6 +189,7 @@ steps: - pytest -v -s engine test_sequence.py test_config.py test_logger.py # OOM in the CI unless we run this separately - pytest -v -s tokenization + working_dir: "/vllm-workspace/tests" # optional - label: V1 Test #mirror_hardwares: [amd] @@ -230,6 +234,7 @@ steps: - python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2 - label: Prefix Caching Test # 9min + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] source_file_dependencies: - vllm/ @@ -248,6 +253,7 @@ steps: - VLLM_USE_FLASHINFER_SAMPLER=1 pytest -v -s samplers - label: LogitsProcessor Test # 5min + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] source_file_dependencies: - vllm/model_executor/layers @@ -269,7 +275,9 @@ steps: - pytest -v -s spec_decode/e2e/test_eagle_correctness.py - label: LoRA Test %N # 15min each + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] + amd_gpus: 8 source_file_dependencies: - vllm/lora - tests/lora @@ -295,7 +303,9 @@ steps: - pytest -v -s compile/test_full_graph.py - label: Kernels Test %N # 1h each + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] + amd_gpus: 8 source_file_dependencies: - csrc/ - vllm/attention @@ -305,6 +315,7 @@ steps: parallelism: 4 - label: Tensorizer Test # 11min + working_dir: "/vllm-workspace/tests" mirror_hardwares: [amd] soft_fail: true source_file_dependencies: @@ -355,6 +366,7 @@ steps: - pytest -v -s encoder_decoder - label: OpenAI-Compatible Tool Use # 20 min + working_dir: "/vllm-workspace/tests" fast_check: false mirror_hardwares: [ amd ] source_file_dependencies: diff --git a/.buildkite/test-template.j2 b/.buildkite/test-template.j2 index ce448836a8278..573675d67f86b 100644 --- a/.buildkite/test-template.j2 +++ b/.buildkite/test-template.j2 @@ -7,7 +7,7 @@ steps: - label: ":docker: build image" depends_on: ~ commands: - - "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --progress plain ." + - "docker build --build-arg max_jobs=16 --tag {{ docker_image_amd }} -f Dockerfile.rocm --target test --progress plain ." - "docker push {{ docker_image_amd }}" key: "amd-build" env: