From 83da80203620e23e285fdedf8501b95dbc2be963 Mon Sep 17 00:00:00 2001
From: Chenguang Li <757486878@qq.com>
Date: Mon, 13 Jan 2025 15:47:05 +0800
Subject: [PATCH] [Misc]Minor Changes about Worker (#11555)

Signed-off-by: Chenguang Li <757486878@qq.com>
---
 vllm/v1/worker/gpu_worker.py | 1 -
 vllm/worker/worker.py        | 1 -
 2 files changed, 2 deletions(-)

diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
index af438f7d5820c..e83bce4283555 100644
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -132,7 +132,6 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
         # Execute a forward pass with dummy inputs to profile the memory usage
         # of the model.
         self.model_runner.profile_run()
-        torch.cuda.synchronize()
 
         free_gpu_memory, _ = torch.cuda.mem_get_info()
         # NOTE(woosuk): Here we assume that the other processes using the same
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
index 0f12549e3f3fd..a3e377ef2b19d 100644
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -200,7 +200,6 @@ def determine_num_available_blocks(self) -> Tuple[int, int]:
                               weights_memory_in_bytes=self.model_runner.
                               model_memory_usage) as result:
             self.model_runner.profile_run()
-            torch.cuda.synchronize()
 
         self._assert_memory_footprint_increased_during_profiling()