Avoid sorting waiting queue & Minor code cleaning (#93)
This commit is contained in:
parent
e331957784
commit
ae356774ab
@ -1,21 +1,16 @@
|
|||||||
import enum
|
import enum
|
||||||
import os
|
|
||||||
import pickle
|
|
||||||
import time
|
import time
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
from typing import Dict, List, Optional, Tuple
|
||||||
|
|
||||||
from cacheflow.core.block_manager import BlockSpaceManager
|
from cacheflow.core.block_manager import BlockSpaceManager
|
||||||
from cacheflow.logger import init_logger
|
from cacheflow.logger import init_logger
|
||||||
from cacheflow.core.policy import PolicyFactory
|
from cacheflow.core.policy import PolicyFactory
|
||||||
from cacheflow.sampling_params import SamplingParams
|
from cacheflow.sampling_params import SamplingParams
|
||||||
from cacheflow.sequence import Sequence
|
from cacheflow.sequence import (Sequence, SequenceGroup, SequenceGroupMetadata,
|
||||||
from cacheflow.sequence import SequenceGroup
|
SequenceOutputs, SequenceStatus)
|
||||||
from cacheflow.sequence import SequenceGroupMetadata
|
|
||||||
from cacheflow.sequence import SequenceOutputs
|
|
||||||
from cacheflow.sequence import SequenceStatus
|
|
||||||
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
_LOGGING_INTERVAL_SEC = 10
|
_LOGGING_INTERVAL_SEC = 10
|
||||||
|
|
||||||
|
|
||||||
@ -129,7 +124,6 @@ class Scheduler:
|
|||||||
|
|
||||||
# Swap in the sequence groups in the SWAPPED state if possible.
|
# Swap in the sequence groups in the SWAPPED state if possible.
|
||||||
self.swapped = self.policy.sort_by_priority(now, self.swapped)
|
self.swapped = self.policy.sort_by_priority(now, self.swapped)
|
||||||
# FCFS
|
|
||||||
while self.swapped and not blocks_to_swap_out:
|
while self.swapped and not blocks_to_swap_out:
|
||||||
seq_group = self.swapped[0]
|
seq_group = self.swapped[0]
|
||||||
# If the sequence group has been preempted in this step, stop.
|
# If the sequence group has been preempted in this step, stop.
|
||||||
@ -162,7 +156,9 @@ class Scheduler:
|
|||||||
# This is because we want to bound the amount of CPU memory taken by
|
# This is because we want to bound the amount of CPU memory taken by
|
||||||
# the swapped sequence groups.
|
# the swapped sequence groups.
|
||||||
if not self.swapped:
|
if not self.swapped:
|
||||||
self.waiting = self.policy.sort_by_priority(now, self.waiting)
|
# Optimization: We do not sort the waiting queue since the preempted
|
||||||
|
# sequence groups are added to the front and the new sequence groups
|
||||||
|
# are added to the back.
|
||||||
while self.waiting:
|
while self.waiting:
|
||||||
seq_group = self.waiting[0]
|
seq_group = self.waiting[0]
|
||||||
# If the sequence group has been preempted in this step, stop.
|
# If the sequence group has been preempted in this step, stop.
|
||||||
@ -347,7 +343,6 @@ class Scheduler:
|
|||||||
self.block_manager.allocate(seq_group)
|
self.block_manager.allocate(seq_group)
|
||||||
for seq in seq_group.seqs:
|
for seq in seq_group.seqs:
|
||||||
seq.status = SequenceStatus.RUNNING
|
seq.status = SequenceStatus.RUNNING
|
||||||
# FIXME(woosuk): Support interactive generation.
|
|
||||||
if seq_group.group_id not in self.num_steps:
|
if seq_group.group_id not in self.num_steps:
|
||||||
self.num_steps[seq_group.group_id] = 0
|
self.num_steps[seq_group.group_id] = 0
|
||||||
|
|
||||||
@ -404,7 +399,9 @@ class Scheduler:
|
|||||||
for seq in seqs:
|
for seq in seqs:
|
||||||
seq.status = SequenceStatus.WAITING
|
seq.status = SequenceStatus.WAITING
|
||||||
self.block_manager.free(seq)
|
self.block_manager.free(seq)
|
||||||
self.waiting.append(seq_group)
|
# NOTE: For FCFS, we insert the preempted sequence group to the front
|
||||||
|
# of the waiting queue.
|
||||||
|
self.waiting.insert(0, seq_group)
|
||||||
|
|
||||||
def _preempt_by_swap(
|
def _preempt_by_swap(
|
||||||
self,
|
self,
|
||||||
|
|||||||
@ -17,7 +17,6 @@ from cacheflow.sequence import SequenceGroup
|
|||||||
from cacheflow.utils import get_gpu_memory, get_cpu_memory
|
from cacheflow.utils import get_gpu_memory, get_cpu_memory
|
||||||
from cacheflow.worker.controller import Controller, DeviceID
|
from cacheflow.worker.controller import Controller, DeviceID
|
||||||
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -7,7 +7,6 @@ from cacheflow.sampling_params import SamplingParams
|
|||||||
from cacheflow.sequence import Sequence, SequenceGroup
|
from cacheflow.sequence import Sequence, SequenceGroup
|
||||||
from cacheflow.utils import Counter
|
from cacheflow.utils import Counter
|
||||||
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -4,7 +4,6 @@ from transformers import AutoConfig
|
|||||||
from cacheflow.logger import init_logger
|
from cacheflow.logger import init_logger
|
||||||
from cacheflow.model_executor.utils import get_dtype_size
|
from cacheflow.model_executor.utils import get_dtype_size
|
||||||
|
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|
||||||
_GiB = 1 << 30
|
_GiB = 1 << 30
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user