diff --git a/vllm/config.py b/vllm/config.py index bb84fdde..ae089a6b 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -224,7 +224,7 @@ class SchedulerConfig: a single iteration. max_num_seqs: Maximum number of sequences to be processed in a single iteration. - max_seq_len: Maximum length of a sequence (including prompt + max_model_len: Maximum length of a sequence (including prompt and generated text). """ diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index ce79cc8d..ea4ad264 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -353,7 +353,7 @@ class LLMEngine: if stopped: continue - # Check if the sequence has reached max_seq_len. + # Check if the sequence has reached max_model_len. if seq.get_len() > self.scheduler_config.max_model_len: self.scheduler.free_seq( seq, SequenceStatus.FINISHED_LENGTH_CAPPED)