diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 5b73ef08f9d84..0d836a1fb13a9 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -236,7 +236,6 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", model_config = copy.deepcopy(self.model_config) parallel_config = copy.deepcopy(self.parallel_config) scheduler_config = copy.deepcopy(self.scheduler_config) - cache_config = copy.deepcopy(self.cache_config) for rank, (worker, (node_id, _)) in enumerate(zip(self.workers, @@ -252,7 +251,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", rank, distributed_init_method, lora_config=self.lora_config, - cache_config=cache_config, + kv_cache_dtype=self.cache_config.cache_dtype, )) driver_rank = 0 @@ -265,7 +264,7 @@ def _init_workers_ray(self, placement_group: "PlacementGroup", driver_rank, distributed_init_method, lora_config=self.lora_config, - cache_config=cache_config, + kv_cache_dtype=self.cache_config.cache_dtype, is_driver_worker=True, )