Avoid sorting waiting queue & Minor code cleaning (#93)

This commit is contained in:
Woosuk Kwon 2023-05-10 01:57:07 -07:00 committed by GitHub
parent e331957784
commit ae356774ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 10 additions and 16 deletions

View File

@ -1,21 +1,16 @@
import enum import enum
import os
import pickle
import time import time
from typing import Any, Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
from cacheflow.core.block_manager import BlockSpaceManager from cacheflow.core.block_manager import BlockSpaceManager
from cacheflow.logger import init_logger from cacheflow.logger import init_logger
from cacheflow.core.policy import PolicyFactory from cacheflow.core.policy import PolicyFactory
from cacheflow.sampling_params import SamplingParams from cacheflow.sampling_params import SamplingParams
from cacheflow.sequence import Sequence from cacheflow.sequence import (Sequence, SequenceGroup, SequenceGroupMetadata,
from cacheflow.sequence import SequenceGroup SequenceOutputs, SequenceStatus)
from cacheflow.sequence import SequenceGroupMetadata
from cacheflow.sequence import SequenceOutputs
from cacheflow.sequence import SequenceStatus
logger = init_logger(__name__) logger = init_logger(__name__)
_LOGGING_INTERVAL_SEC = 10 _LOGGING_INTERVAL_SEC = 10
@ -129,7 +124,6 @@ class Scheduler:
# Swap in the sequence groups in the SWAPPED state if possible. # Swap in the sequence groups in the SWAPPED state if possible.
self.swapped = self.policy.sort_by_priority(now, self.swapped) self.swapped = self.policy.sort_by_priority(now, self.swapped)
# FCFS
while self.swapped and not blocks_to_swap_out: while self.swapped and not blocks_to_swap_out:
seq_group = self.swapped[0] seq_group = self.swapped[0]
# If the sequence group has been preempted in this step, stop. # If the sequence group has been preempted in this step, stop.
@ -162,7 +156,9 @@ class Scheduler:
# This is because we want to bound the amount of CPU memory taken by # This is because we want to bound the amount of CPU memory taken by
# the swapped sequence groups. # the swapped sequence groups.
if not self.swapped: if not self.swapped:
self.waiting = self.policy.sort_by_priority(now, self.waiting) # Optimization: We do not sort the waiting queue since the preempted
# sequence groups are added to the front and the new sequence groups
# are added to the back.
while self.waiting: while self.waiting:
seq_group = self.waiting[0] seq_group = self.waiting[0]
# If the sequence group has been preempted in this step, stop. # If the sequence group has been preempted in this step, stop.
@ -347,7 +343,6 @@ class Scheduler:
self.block_manager.allocate(seq_group) self.block_manager.allocate(seq_group)
for seq in seq_group.seqs: for seq in seq_group.seqs:
seq.status = SequenceStatus.RUNNING seq.status = SequenceStatus.RUNNING
# FIXME(woosuk): Support interactive generation.
if seq_group.group_id not in self.num_steps: if seq_group.group_id not in self.num_steps:
self.num_steps[seq_group.group_id] = 0 self.num_steps[seq_group.group_id] = 0
@ -404,7 +399,9 @@ class Scheduler:
for seq in seqs: for seq in seqs:
seq.status = SequenceStatus.WAITING seq.status = SequenceStatus.WAITING
self.block_manager.free(seq) self.block_manager.free(seq)
self.waiting.append(seq_group) # NOTE: For FCFS, we insert the preempted sequence group to the front
# of the waiting queue.
self.waiting.insert(0, seq_group)
def _preempt_by_swap( def _preempt_by_swap(
self, self,

View File

@ -17,7 +17,6 @@ from cacheflow.sequence import SequenceGroup
from cacheflow.utils import get_gpu_memory, get_cpu_memory from cacheflow.utils import get_gpu_memory, get_cpu_memory
from cacheflow.worker.controller import Controller, DeviceID from cacheflow.worker.controller import Controller, DeviceID
logger = init_logger(__name__) logger = init_logger(__name__)

View File

@ -7,7 +7,6 @@ from cacheflow.sampling_params import SamplingParams
from cacheflow.sequence import Sequence, SequenceGroup from cacheflow.sequence import Sequence, SequenceGroup
from cacheflow.utils import Counter from cacheflow.utils import Counter
logger = init_logger(__name__) logger = init_logger(__name__)

View File

@ -4,7 +4,6 @@ from transformers import AutoConfig
from cacheflow.logger import init_logger from cacheflow.logger import init_logger
from cacheflow.model_executor.utils import get_dtype_size from cacheflow.model_executor.utils import get_dtype_size
logger = init_logger(__name__) logger = init_logger(__name__)
_GiB = 1 << 30 _GiB = 1 << 30