From 546b101fa05043feb470513a778c31114ea3aa05 Mon Sep 17 00:00:00 2001 From: pushan <62173185+pushan01@users.noreply.github.com> Date: Thu, 11 Jul 2024 21:46:31 +0800 Subject: [PATCH] [BugFix]: fix engine timeout due to request abort (#6255) Signed-off-by: yatta zhang Signed-off-by: zhangyuntao.dev Co-authored-by: zhangyuntao.dev --- vllm/engine/async_llm_engine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 9b4ef48b..f3c8d69e 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -553,11 +553,13 @@ class AsyncLLMEngine: request_outputs = await self.engine.step_async(virtual_engine) # Put the outputs into the corresponding streams. + finished = True for request_output in request_outputs: self._request_tracker.process_request_output( request_output, verbose=self.log_requests) + finished = finished and request_output.finished - return len(request_outputs) > 0 + return not finished async def _engine_abort(self, request_ids: Iterable[str]): if self.engine_use_ray: