[Misc] Fix typos in scheduler.py (#7285)
Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
This commit is contained in:
parent
e53dfd3eaf
commit
746709642c
@ -678,7 +678,7 @@ class Scheduler:
|
|||||||
all tokens.
|
all tokens.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
SchedulerSwappedInOutputs.
|
SchedulerPrefillOutputs.
|
||||||
"""
|
"""
|
||||||
ignored_seq_groups: List[SequenceGroup] = []
|
ignored_seq_groups: List[SequenceGroup] = []
|
||||||
seq_groups: List[SequenceGroup] = []
|
seq_groups: List[SequenceGroup] = []
|
||||||
@ -851,7 +851,7 @@ class Scheduler:
|
|||||||
preempted=preempted,
|
preempted=preempted,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _schedule_chunked_prefill(self):
|
def _schedule_chunked_prefill(self) -> SchedulerOutputs:
|
||||||
"""Schedule queued requests.
|
"""Schedule queued requests.
|
||||||
|
|
||||||
Chunked prefill allows to chunk prefill requests, batch them together
|
Chunked prefill allows to chunk prefill requests, batch them together
|
||||||
@ -862,7 +862,7 @@ class Scheduler:
|
|||||||
|
|
||||||
The policy can sustain the high GPU utilization because it can put
|
The policy can sustain the high GPU utilization because it can put
|
||||||
prefill and decodes requests to the same batch, while it improves
|
prefill and decodes requests to the same batch, while it improves
|
||||||
inter token latency because decodes requests don't need to blocked
|
inter token latency because decodes requests don't need to be blocked
|
||||||
by prefill requests.
|
by prefill requests.
|
||||||
"""
|
"""
|
||||||
budget = SchedulingBudget(
|
budget = SchedulingBudget(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user