diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 9b4ef48b..f3c8d69e 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -553,11 +553,13 @@ class AsyncLLMEngine: request_outputs = await self.engine.step_async(virtual_engine) # Put the outputs into the corresponding streams. + finished = True for request_output in request_outputs: self._request_tracker.process_request_output( request_output, verbose=self.log_requests) + finished = finished and request_output.finished - return len(request_outputs) > 0 + return not finished async def _engine_abort(self, request_ids: Iterable[str]): if self.engine_use_ray: