From c87ebc3ef9ae6e8d6babbca782510ff924b3abc7 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Mon, 1 Jul 2024 16:17:58 -0700 Subject: [PATCH] [BugFix] Ensure worker model loop is always stopped at the right time (#5987) --- vllm/engine/llm_engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index f7e38c0e..5886ebc2 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -838,7 +838,7 @@ class LLMEngine: # Tracing self.do_tracing(scheduler_outputs) - if not request_outputs: + if not self.has_unfinished_requests(): # Stop the execute model loop in parallel workers until there are # more requests to process. This avoids waiting indefinitely in # torch.distributed ops which may otherwise timeout, and unblocks