diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 5b73ef08..0d836a1f 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -236,7 +236,6 @@ class LLMEngine: model_config = copy.deepcopy(self.model_config) parallel_config = copy.deepcopy(self.parallel_config) scheduler_config = copy.deepcopy(self.scheduler_config) - cache_config = copy.deepcopy(self.cache_config) for rank, (worker, (node_id, _)) in enumerate(zip(self.workers, @@ -252,7 +251,7 @@ class LLMEngine: rank, distributed_init_method, lora_config=self.lora_config, - cache_config=cache_config, + kv_cache_dtype=self.cache_config.cache_dtype, )) driver_rank = 0 @@ -265,7 +264,7 @@ class LLMEngine: driver_rank, distributed_init_method, lora_config=self.lora_config, - cache_config=cache_config, + kv_cache_dtype=self.cache_config.cache_dtype, is_driver_worker=True, )