diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 56c2417d..7de60d73 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -287,6 +287,12 @@ class LLMEngine: # the closure used to initialize Ray worker actors raise RuntimeError("LLMEngine should not be pickled!") + def __del__(self): + # Shutdown model executor when engine is garbage collected + # Use getattr since __init__ can fail before the field is set + if model_executor := getattr(self, "model_executor", None): + model_executor.shutdown() + def get_tokenizer(self) -> "PreTrainedTokenizer": return self.tokenizer.get_lora_tokenizer(None) diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index 1839b560..1838c34b 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -95,6 +95,13 @@ class ExecutorBase(ABC): exception.""" raise NotImplementedError + def shutdown(self) -> None: + """Shutdown the executor.""" + return + + def __del__(self): + self.shutdown() + class ExecutorAsyncBase(ExecutorBase):