Fix a bug in swap_in and swap_out

This commit is contained in:
Woosuk Kwon 2023-02-14 02:12:58 +00:00
parent b1644f764f
commit 1f739f9b03

View File

@ -155,42 +155,60 @@ class BlockSpaceManager:
return len(blocks) + num_swapped_seqs <= num_free_blocks return len(blocks) + num_swapped_seqs <= num_free_blocks
def swap_in(self, seq_group: SequenceGroup) -> Dict[int, int]: def swap_in(self, seq_group: SequenceGroup) -> Dict[int, int]:
# src_block_number -> dst_block_number # CPU block -> GPU block.
mapping: Dict[int, int] = {} mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
for seq in seq_group.seqs: for seq in seq_group.seqs:
if seq.status == SequenceStatus.FINISHED: if seq.status == SequenceStatus.FINISHED:
continue continue
new_block_table: BlockTable = []
block_table = self.block_tables[seq.seq_id] block_table = self.block_tables[seq.seq_id]
for cpu_block in block_table: for cpu_block in block_table:
if cpu_block in mapping: if cpu_block in mapping:
new_block_table.append(mapping[cpu_block])
continue continue
gpu_block = self.gpu_allocator.allocate() gpu_block = self.gpu_allocator.allocate()
mapping[cpu_block.block_number] = gpu_block.block_number mapping[cpu_block] = gpu_block
# Free the CPU block swapped in to GPU. # Free the CPU block swapped in to GPU.
self.cpu_allocator.free(cpu_block) self.cpu_allocator.free(cpu_block)
return mapping self.block_tables[seq.seq_id] = new_block_table
block_number_mapping = {
cpu_block.block_number: gpu_block.block_number
for cpu_block, gpu_block in mapping.items()
}
return block_number_mapping
def can_swap_out(self, seq_group: SequenceGroup) -> bool: def can_swap_out(self, seq_group: SequenceGroup) -> bool:
blocks = self._get_physical_blocks(seq_group) blocks = self._get_physical_blocks(seq_group)
return len(blocks) <= self.cpu_allocator.get_num_free_blocks() return len(blocks) <= self.cpu_allocator.get_num_free_blocks()
def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]: def swap_out(self, seq_group: SequenceGroup) -> Dict[int, int]:
# src_block_number -> dst_block_number # GPU block -> CPU block.
mapping: Dict[int, int] = {} mapping: Dict[PhysicalTokenBlock, PhysicalTokenBlock] = {}
for seq in seq_group.seqs: for seq in seq_group.seqs:
if seq.status == SequenceStatus.FINISHED: if seq.status == SequenceStatus.FINISHED:
continue continue
new_block_table: BlockTable = []
block_table = self.block_tables[seq.seq_id] block_table = self.block_tables[seq.seq_id]
for gpu_block in block_table: for gpu_block in block_table:
if gpu_block.block_number in mapping: if gpu_block in mapping:
new_block_table.append(mapping[gpu_block])
continue continue
cpu_block = self.cpu_allocator.allocate() cpu_block = self.cpu_allocator.allocate()
mapping[gpu_block.block_number] = cpu_block.block_number new_block_table.append(cpu_block)
mapping[gpu_block] = cpu_block
# Free the GPU block swapped out to CPU. # Free the GPU block swapped out to CPU.
self.gpu_allocator.free(gpu_block) self.gpu_allocator.free(gpu_block)
return mapping self.block_tables[seq.seq_id] = new_block_table
block_number_mapping = {
gpu_block.block_number: cpu_block.block_number
for gpu_block, cpu_block in mapping.items()
}
return block_number_mapping
def _free_blocks(self, blocks: Iterable[PhysicalTokenBlock]) -> None: def _free_blocks(self, blocks: Iterable[PhysicalTokenBlock]) -> None:
for block in blocks: for block in blocks: