[Bugfix] Better error message for MLPSpeculator when num_speculative_tokens is set too high (#5894)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
2024-06-28 16:42:17 +02:00 · 2024-06-28 16:42:17 +02:00 · ec1ad0046c
commit ec1ad0046c
parent 57f09a419c
1 changed files with 3 additions and 3 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@ -956,9 +956,9 @@ class SpeculativeConfig:
                    # Verify provided value doesn't exceed the maximum
                    # supported by the draft model.
                    raise ValueError(
-                        "Expected both speculative_model and "
+                        "This speculative model supports a maximum of "
-                        "num_speculative_tokens to be provided, but found "
+                        f"num_speculative_tokens={n_predict}, but "
-                        f"{speculative_model=} and {num_speculative_tokens=}.")
+                        f"{num_speculative_tokens=} was provided.")
            draft_model_config.max_model_len = (
                SpeculativeConfig._maybe_override_draft_max_model_len(