From a2a9869cb7e11a46c215e0cd55401509395c035d Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 12 Feb 2023 08:25:05 +0000 Subject: [PATCH] SERVING -> RUNNING --- cacheflow/master/block_manager.py | 2 +- cacheflow/sequence.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cacheflow/master/block_manager.py b/cacheflow/master/block_manager.py index ab41ae38..3400cc5d 100644 --- a/cacheflow/master/block_manager.py +++ b/cacheflow/master/block_manager.py @@ -95,7 +95,7 @@ class BlockSpaceManager: # Simple heuristic: If there is at least one free block # for each sequence, we can append. num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks() - num_seqs = seq_group.num_seqs(status=SequenceStatus.SERVING) + num_seqs = seq_group.num_seqs(status=SequenceStatus.RUNNING) return num_seqs <= num_free_gpu_blocks def append(self, seq: Sequence) -> Optional[Tuple[int, int]]: diff --git a/cacheflow/sequence.py b/cacheflow/sequence.py index 172b0b6b..619ccd8b 100644 --- a/cacheflow/sequence.py +++ b/cacheflow/sequence.py @@ -7,7 +7,7 @@ from cacheflow.decoding import DecodingParams class SequenceStatus(enum.Enum): PENDING = enum.auto() - SERVING = enum.auto() + RUNNING = enum.auto() SWAPPED = enum.auto() FINISHED = enum.auto()