diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index 5c217f6f..9fe01a14 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -104,21 +104,24 @@ class Scheduler: request_id = (request_id, ) request_ids = set(request_id) for state_queue in [self.waiting, self.running, self.swapped]: - # We need to reverse the list as we are removing elements - # from it as we iterate over it. If we don't do it, - # indices will get messed up and we will skip over elements. - for seq_group in reversed(state_queue): + aborted_groups = [] + for seq_group in state_queue: + if not request_ids: + # Using 'break' here may add two extra iterations, + # but is acceptable to reduce complexity . + break if seq_group.request_id in request_ids: - # Remove the sequence group from the state queue. - state_queue.remove(seq_group) - for seq in seq_group.get_seqs(): - if seq.is_finished(): - continue - seq.status = SequenceStatus.FINISHED_ABORTED - self.free_seq(seq) + # Appending aborted group into pending list. + aborted_groups.append(seq_group) request_ids.remove(seq_group.request_id) - if not request_ids: - return + for aborted_group in aborted_groups: + # Remove the sequence group from the state queue. + state_queue.remove(aborted_group) + for seq in seq_group.get_seqs(): + if seq.is_finished(): + continue + seq.status = SequenceStatus.FINISHED_ABORTED + self.free_seq(seq) def has_unfinished_seqs(self) -> bool: return self.waiting or self.running or self.swapped