From 546b101fa05043feb470513a778c31114ea3aa05 Mon Sep 17 00:00:00 2001
From: pushan <62173185+pushan01@users.noreply.github.com>
Date: Thu, 11 Jul 2024 21:46:31 +0800
Subject: [PATCH] [BugFix]: fix engine timeout due to request abort (#6255)

Signed-off-by: yatta zhang <ytzhang01@foxmail.com>
Signed-off-by: zhangyuntao.dev <zhangyuntao.dev@bytedance.com>
Co-authored-by: zhangyuntao.dev <zhangyuntao.dev@bytedance.com>
---
 vllm/engine/async_llm_engine.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 9b4ef48b..f3c8d69e 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -553,11 +553,13 @@ class AsyncLLMEngine:
             request_outputs = await self.engine.step_async(virtual_engine)
 
         # Put the outputs into the corresponding streams.
+        finished = True
         for request_output in request_outputs:
             self._request_tracker.process_request_output(
                 request_output, verbose=self.log_requests)
+            finished = finished and request_output.finished
 
-        return len(request_outputs) > 0
+        return not finished
 
     async def _engine_abort(self, request_ids: Iterable[str]):
         if self.engine_use_ray: