[Misc] Fix minor typo in scheduler (#8765)

This commit is contained in:
Woo-Yeon Lee 2024-09-25 16:53:03 +09:00 committed by GitHub
parent 3368c3ab36
commit 8fae5ed7f6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1554,14 +1554,14 @@ class Scheduler:
# the number of new tokens that is dividable by the block size # the number of new tokens that is dividable by the block size
# to avoid partial block matching. # to avoid partial block matching.
block_size = self.cache_config.block_size block_size = self.cache_config.block_size
reminder = budget.token_budget % block_size remainder = budget.token_budget % block_size
if reminder != 0: if remainder != 0:
raise ValueError("When enabling chunked prefill and " raise ValueError("When enabling chunked prefill and "
"prefix caching, max_num_batched_tokens " "prefix caching, max_num_batched_tokens "
"(chunk size) must be dividable by " "(chunk size) must be dividable by "
"block size, but got chunk_size " "block size, but got chunk_size "
f"({budget.token_budget}) % block_size " f"({budget.token_budget}) % block_size "
f"({block_size}) = {reminder}") f"({block_size}) = {remainder}")
if remaining_token_budget < num_new_tokens: if remaining_token_budget < num_new_tokens:
num_new_tokens = (remaining_token_budget // num_new_tokens = (remaining_token_budget //
block_size) * block_size block_size) * block_size