SERVING -> RUNNING
This commit is contained in:
parent
46958cf941
commit
a2a9869cb7
@ -95,7 +95,7 @@ class BlockSpaceManager:
|
|||||||
# Simple heuristic: If there is at least one free block
|
# Simple heuristic: If there is at least one free block
|
||||||
# for each sequence, we can append.
|
# for each sequence, we can append.
|
||||||
num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()
|
num_free_gpu_blocks = self.gpu_allocator.get_num_free_blocks()
|
||||||
num_seqs = seq_group.num_seqs(status=SequenceStatus.SERVING)
|
num_seqs = seq_group.num_seqs(status=SequenceStatus.RUNNING)
|
||||||
return num_seqs <= num_free_gpu_blocks
|
return num_seqs <= num_free_gpu_blocks
|
||||||
|
|
||||||
def append(self, seq: Sequence) -> Optional[Tuple[int, int]]:
|
def append(self, seq: Sequence) -> Optional[Tuple[int, int]]:
|
||||||
|
|||||||
@ -7,7 +7,7 @@ from cacheflow.decoding import DecodingParams
|
|||||||
|
|
||||||
class SequenceStatus(enum.Enum):
|
class SequenceStatus(enum.Enum):
|
||||||
PENDING = enum.auto()
|
PENDING = enum.auto()
|
||||||
SERVING = enum.auto()
|
RUNNING = enum.auto()
|
||||||
SWAPPED = enum.auto()
|
SWAPPED = enum.auto()
|
||||||
FINISHED = enum.auto()
|
FINISHED = enum.auto()
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user