From 3f8d42c81fe8d3842a9e05c9f5d98290b7f79736 Mon Sep 17 00:00:00 2001
From: Travis Johnson <tsjohnso@us.ibm.com>
Date: Fri, 19 Jul 2024 20:18:19 -0600
Subject: [PATCH] Pipeline Parallel: Guard for KeyErrors at request abort
 (#6587)

Signed-off-by: Travis Johnson <tsjohnso@us.ibm.com>
---
 vllm/engine/async_llm_engine.py             | 5 ++++-
 vllm/engine/output_processor/single_step.py | 6 +++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
index 73e3c5de..7db06d80 100644
--- a/vllm/engine/async_llm_engine.py
+++ b/vllm/engine/async_llm_engine.py
@@ -131,7 +131,10 @@ class RequestTracker:
         """Process a request output from the engine."""
         request_id = request_output.request_id
 
-        self._request_streams[request_id].put(request_output)
+        # Guard against a KeyError which can occur if the request was aborted
+        # while the output was generated
+        if (stream := self._request_streams.get(request_id)) is not None:
+            stream.put(request_output)
         if request_output.finished:
             if verbose:
                 logger.info("Finished request %s.", request_id)
diff --git a/vllm/engine/output_processor/single_step.py b/vllm/engine/output_processor/single_step.py
index 4851897d..59eb4bc4 100644
--- a/vllm/engine/output_processor/single_step.py
+++ b/vllm/engine/output_processor/single_step.py
@@ -90,7 +90,11 @@ class SingleStepOutputProcessor(SequenceGroupOutputProcessor):
             for parent_seq in parent_seqs
         }
         for sample in samples:
-            parent_child_dict[sample.parent_seq_id].append(sample)
+            # Guard against a KeyError which can occur if the request was
+            # aborted while the output was generated
+            if (child_list :=
+                    parent_child_dict.get(sample.parent_seq_id)) is not None:
+                child_list.append(sample)
         # List of (child, parent)
         child_seqs: List[Tuple[Sequence, Sequence]] = []