From 3f8d42c81fe8d3842a9e05c9f5d98290b7f79736 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Fri, 19 Jul 2024 20:18:19 -0600 Subject: [PATCH] Pipeline Parallel: Guard for KeyErrors at request abort (#6587) Signed-off-by: Travis Johnson --- vllm/engine/async_llm_engine.py | 5 ++++- vllm/engine/output_processor/single_step.py | 6 +++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 73e3c5de..7db06d80 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -131,7 +131,10 @@ class RequestTracker: """Process a request output from the engine.""" request_id = request_output.request_id - self._request_streams[request_id].put(request_output) + # Guard against a KeyError which can occur if the request was aborted + # while the output was generated + if (stream := self._request_streams.get(request_id)) is not None: + stream.put(request_output) if request_output.finished: if verbose: logger.info("Finished request %s.", request_id) diff --git a/vllm/engine/output_processor/single_step.py b/vllm/engine/output_processor/single_step.py index 4851897d..59eb4bc4 100644 --- a/vllm/engine/output_processor/single_step.py +++ b/vllm/engine/output_processor/single_step.py @@ -90,7 +90,11 @@ class SingleStepOutputProcessor(SequenceGroupOutputProcessor): for parent_seq in parent_seqs } for sample in samples: - parent_child_dict[sample.parent_seq_id].append(sample) + # Guard against a KeyError which can occur if the request was + # aborted while the output was generated + if (child_list := + parent_child_dict.get(sample.parent_seq_id)) is not None: + child_list.append(sample) # List of (child, parent) child_seqs: List[Tuple[Sequence, Sequence]] = []