[PREFIX CACHING FOLLOW UP] OrderedDict-based evictor (#3431)

Co-authored-by: rsnm2 <rshaw@neuralmagic.com>
Co-authored-by: Luka <luka@paperspace>
This commit is contained in:
ElizaWszola 2024-03-21 07:20:04 +01:00 committed by GitHub
parent 523e30ea0c
commit 6ebd02bdef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1,5 +1,5 @@
import enum import enum
from typing import Dict from typing import OrderedDict
from abc import ABC, abstractmethod, abstractproperty from abc import ABC, abstractmethod, abstractproperty
from vllm.block import PhysicalTokenBlock from vllm.block import PhysicalTokenBlock
@ -58,27 +58,26 @@ class LRUEvictor(Evictor):
""" """
def __init__(self): def __init__(self):
self.free_table: Dict[int, PhysicalTokenBlock] = {} self.free_table: OrderedDict[int, PhysicalTokenBlock] = OrderedDict()
def __contains__(self, block_hash: int) -> bool: def __contains__(self, block_hash: int) -> bool:
return block_hash in self.free_table return block_hash in self.free_table
# TODO: The performance of this evict function can be optimized further.
def evict(self) -> PhysicalTokenBlock: def evict(self) -> PhysicalTokenBlock:
if len(self.free_table) == 0: if len(self.free_table) == 0:
raise ValueError("No usable cache memory left") raise ValueError("No usable cache memory left")
free_blocks = self.free_table.values()
# Get evicted block evicted_block = next(iter(self.free_table.values()))
evicted_block: PhysicalTokenBlock = next(iter(free_blocks)) # The blocks with the lowest timestamps should be placed consecutively
# at the start of OrderedDict. Loop through all these blocks to
for block in free_blocks: # find the one with maximum number of hashed tokens.
if (block.last_accessed < evicted_block.last_accessed for _, block in self.free_table.items():
or block.last_accessed == evicted_block.last_accessed and if evicted_block.last_accessed < block.last_accessed:
block.num_hashed_tokens > evicted_block.num_hashed_tokens): break
if evicted_block.num_hashed_tokens < block.num_hashed_tokens:
evicted_block = block evicted_block = block
del self.free_table[evicted_block.block_hash] self.free_table.pop(evicted_block.block_hash)
evicted_block.computed = False evicted_block.computed = False
return evicted_block return evicted_block
@ -91,7 +90,7 @@ class LRUEvictor(Evictor):
raise ValueError( raise ValueError(
"Attempting to remove block that's not in the evictor") "Attempting to remove block that's not in the evictor")
block: PhysicalTokenBlock = self.free_table[block_hash] block: PhysicalTokenBlock = self.free_table[block_hash]
del self.free_table[block_hash] self.free_table.pop(block_hash)
return block return block
@property @property