From c1c0d00b88320f97e00a3175fac235a232893da5 Mon Sep 17 00:00:00 2001 From: Roy Date: Tue, 27 Feb 2024 09:33:38 +0800 Subject: [PATCH] Don't use cupy when `enforce_eager=True` (#3037) --- vllm/engine/llm_engine.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c1a75924..f5b2145c 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -284,7 +284,10 @@ class LLMEngine: is_driver_worker=True, ) - self._run_workers("init_model", cupy_port=get_open_port()) + # don't use cupy for eager mode + self._run_workers("init_model", + cupy_port=get_open_port() + if not model_config.enforce_eager else None) self._run_workers( "load_model", max_concurrent_workers=self.parallel_config.