Add comments on swap space (#154)
This commit is contained in:
parent
dcda03b4cb
commit
3f92038b99
@ -3,7 +3,8 @@
|
||||
On the server side, run one of the following commands:
|
||||
(vLLM backend)
|
||||
python -m vllm.entrypoints.api_server \
|
||||
--disable-log-requests --model <your_model>
|
||||
--model <your_model> --swap-space 16 \
|
||||
--disable-log-requests
|
||||
|
||||
(TGI backend)
|
||||
./launch_hf_server.sh <your_model>
|
||||
|
||||
@ -409,7 +409,12 @@ class Scheduler:
|
||||
seq_group: SequenceGroup,
|
||||
blocks_to_swap_out: Dict[int, int],
|
||||
) -> None:
|
||||
assert self.block_manager.can_swap_out(seq_group)
|
||||
if not self.block_manager.can_swap_out(seq_group):
|
||||
# FIXME(woosuk): Abort the sequence group instead of aborting the
|
||||
# entire engine.
|
||||
raise RuntimeError(
|
||||
"Aborted due to the lack of CPU swap space. Please increase "
|
||||
"the swap space to avoid this error.")
|
||||
mapping = self.block_manager.swap_out(seq_group)
|
||||
blocks_to_swap_out.update(mapping)
|
||||
for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user