diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py index bff35774..058e3ef4 100644 --- a/vllm/core/scheduler.py +++ b/vllm/core/scheduler.py @@ -190,13 +190,13 @@ class Scheduler: break num_prompt_tokens = seq_group.get_seqs()[0].get_len() - if num_prompt_tokens > min( - self.scheduler_config.max_model_len, - self.scheduler_config.max_num_batched_tokens): + prompt_limit = min( + self.scheduler_config.max_model_len, + self.scheduler_config.max_num_batched_tokens) + if num_prompt_tokens > prompt_limit: logger.warning( f"Input prompt ({num_prompt_tokens} tokens) is too long" - " and exceeds limit of " - f"{self.scheduler_config.max_model_len}") + f" and exceeds limit of {prompt_limit}") for seq in seq_group.get_seqs(): seq.status = SequenceStatus.FINISHED_IGNORED ignored_seq_groups.append(seq_group)