From 83da80203620e23e285fdedf8501b95dbc2be963 Mon Sep 17 00:00:00 2001 From: Chenguang Li <757486878@qq.com> Date: Mon, 13 Jan 2025 15:47:05 +0800 Subject: [PATCH] [Misc]Minor Changes about Worker (#11555) Signed-off-by: Chenguang Li <757486878@qq.com> --- vllm/v1/worker/gpu_worker.py | 1 - vllm/worker/worker.py | 1 - 2 files changed, 2 deletions(-) diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index af438f7d5820c..e83bce4283555 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -132,7 +132,6 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: # Execute a forward pass with dummy inputs to profile the memory usage # of the model. self.model_runner.profile_run() - torch.cuda.synchronize() free_gpu_memory, _ = torch.cuda.mem_get_info() # NOTE(woosuk): Here we assume that the other processes using the same diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py index 0f12549e3f3fd..a3e377ef2b19d 100644 --- a/vllm/worker/worker.py +++ b/vllm/worker/worker.py @@ -200,7 +200,6 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: weights_memory_in_bytes=self.model_runner. model_memory_usage) as result: self.model_runner.profile_run() - torch.cuda.synchronize() self._assert_memory_footprint_increased_during_profiling()