[Bugfix] Better error message for MLPSpeculator when num_speculative_tokens is set too high (#5894)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
parent
57f09a419c
commit
ec1ad0046c
@ -956,9 +956,9 @@ class SpeculativeConfig:
|
|||||||
# Verify provided value doesn't exceed the maximum
|
# Verify provided value doesn't exceed the maximum
|
||||||
# supported by the draft model.
|
# supported by the draft model.
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Expected both speculative_model and "
|
"This speculative model supports a maximum of "
|
||||||
"num_speculative_tokens to be provided, but found "
|
f"num_speculative_tokens={n_predict}, but "
|
||||||
f"{speculative_model=} and {num_speculative_tokens=}.")
|
f"{num_speculative_tokens=} was provided.")
|
||||||
|
|
||||||
draft_model_config.max_model_len = (
|
draft_model_config.max_model_len = (
|
||||||
SpeculativeConfig._maybe_override_draft_max_model_len(
|
SpeculativeConfig._maybe_override_draft_max_model_len(
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user