diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 73e3c5de..7db06d80 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -131,7 +131,10 @@ class RequestTracker: """Process a request output from the engine.""" request_id = request_output.request_id - self._request_streams[request_id].put(request_output) + # Guard against a KeyError which can occur if the request was aborted + # while the output was generated + if (stream := self._request_streams.get(request_id)) is not None: + stream.put(request_output) if request_output.finished: if verbose: logger.info("Finished request %s.", request_id) diff --git a/vllm/engine/output_processor/single_step.py b/vllm/engine/output_processor/single_step.py index 4851897d..59eb4bc4 100644 --- a/vllm/engine/output_processor/single_step.py +++ b/vllm/engine/output_processor/single_step.py @@ -90,7 +90,11 @@ class SingleStepOutputProcessor(SequenceGroupOutputProcessor): for parent_seq in parent_seqs } for sample in samples: - parent_child_dict[sample.parent_seq_id].append(sample) + # Guard against a KeyError which can occur if the request was + # aborted while the output was generated + if (child_list := + parent_child_dict.get(sample.parent_seq_id)) is not None: + child_list.append(sample) # List of (child, parent) child_seqs: List[Tuple[Sequence, Sequence]] = []