Skip to content

Commit

Permalink
Update CI - Bump vllm to v0.4.2 (#43)
Browse files Browse the repository at this point in the history
  • Loading branch information
pskiran1 authored May 29, 2024
1 parent 861a198 commit f77614e
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 9 deletions.
2 changes: 0 additions & 2 deletions ci/L0_multi_gpu/vllm_backend/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ rm -rf models && mkdir -p models
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
sed -i '3s/^/ "tensor_parallel_size": 2,\n/' models/vllm_opt/1/model.json

python3 -m pip install --upgrade pip && pip3 install tritonclient[grpc] nvidia-ml-py3

RET=0

run_server
Expand Down
14 changes: 7 additions & 7 deletions ci/L0_multi_gpu/vllm_backend/vllm_multi_gpu_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import unittest
from functools import partial

import nvidia_smi
import pynvml
import tritonclient.grpc as grpcclient
from tritonclient.utils import *

Expand All @@ -38,20 +38,20 @@

class VLLMMultiGPUTest(TestResultCollector):
def setUp(self):
nvidia_smi.nvmlInit()
pynvml.nvmlInit()
self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
self.vllm_model_name = "vllm_opt"

def get_gpu_memory_utilization(self, gpu_id):
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(gpu_id)
info = nvidia_smi.nvmlDeviceGetMemoryInfo(handle)
handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
return info.used

def get_available_gpu_ids(self):
device_count = nvidia_smi.nvmlDeviceGetCount()
device_count = pynvml.nvmlDeviceGetCount()
available_gpus = []
for gpu_id in range(device_count):
handle = nvidia_smi.nvmlDeviceGetHandleByIndex(gpu_id)
handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
if handle:
available_gpus.append(gpu_id)
return available_gpus
Expand Down Expand Up @@ -119,7 +119,7 @@ def _test_vllm_model(self, send_parameters_as_tensor=True):
self.triton_client.stop_stream()

def tearDown(self):
nvidia_smi.nvmlShutdown()
pynvml.nvmlShutdown()
self.triton_client.close()


Expand Down

0 comments on commit f77614e

Please sign in to comment.