diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index 75e3c67c5563e..6f5adb4f64728 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -109,7 +109,7 @@ def initialize_cache(self, num_gpu_blocks: int, num_cpu_blocks) -> None: """ # NOTE: This is logged in the executor because there can be >1 workers. logger.info("# %s blocks: %d, # CPU blocks: %d", - vllm.platforms.current_platform.dispatch_key, + vllm.platforms.current_platform.device_name, num_gpu_blocks, num_cpu_blocks) max_concurrency = (num_gpu_blocks * self.cache_config.block_size / self.model_config.max_model_len) diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 58948ad1aba0a..d6dae2e526dc6 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -327,7 +327,7 @@ def get_device_communicator_cls(cls) -> str: """ Get device specific communicator class for distributed communication. """ - return "vllm.distributed.device_communicator.base_device_communicator.DeviceCommunicatorBase" # noqa + return "vllm.distributed.device_communicators.base_device_communicator.DeviceCommunicatorBase" # noqa class UnspecifiedPlatform(Platform):