[Misc] Fix minor typo in scheduler (#8765)
This commit is contained in:
parent
3368c3ab36
commit
8fae5ed7f6
@ -1554,14 +1554,14 @@ class Scheduler:
|
||||
# the number of new tokens that is dividable by the block size
|
||||
# to avoid partial block matching.
|
||||
block_size = self.cache_config.block_size
|
||||
reminder = budget.token_budget % block_size
|
||||
if reminder != 0:
|
||||
remainder = budget.token_budget % block_size
|
||||
if remainder != 0:
|
||||
raise ValueError("When enabling chunked prefill and "
|
||||
"prefix caching, max_num_batched_tokens "
|
||||
"(chunk size) must be dividable by "
|
||||
"block size, but got chunk_size "
|
||||
f"({budget.token_budget}) % block_size "
|
||||
f"({block_size}) = {reminder}")
|
||||
f"({block_size}) = {remainder}")
|
||||
if remaining_token_budget < num_new_tokens:
|
||||
num_new_tokens = (remaining_token_budget //
|
||||
block_size) * block_size
|
||||
|
||||
Loading…
Reference in New Issue
Block a user