From 351d5e7b8253d754b2a951152cd48927c4c1629d Mon Sep 17 00:00:00 2001 From: maor-ps <154728172+maor-ps@users.noreply.github.com> Date: Tue, 11 Jun 2024 05:30:31 +0300 Subject: [PATCH] [Bugfix] OpenAI entrypoint limits logprobs while ignoring server defined --max-logprobs (#5312) Co-authored-by: DarkLight1337 --- tests/entrypoints/test_openai_server.py | 8 ++++++-- vllm/config.py | 2 +- vllm/engine/arg_utils.py | 2 +- vllm/entrypoints/openai/protocol.py | 9 ++++----- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/entrypoints/test_openai_server.py b/tests/entrypoints/test_openai_server.py index d0fe08ae..79a6c068 100644 --- a/tests/entrypoints/test_openai_server.py +++ b/tests/entrypoints/test_openai_server.py @@ -264,7 +264,9 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI, prompt=[0, 0, 0, 0, 0], max_tokens=5, temperature=0.0, - logprobs=6, + # vLLM has higher default max_logprobs (20 instead of 5) to support + # both Completion API and Chat Completion API + logprobs=21, ) ... with pytest.raises( @@ -274,7 +276,9 @@ async def test_too_many_completion_logprobs(server, client: openai.AsyncOpenAI, prompt=[0, 0, 0, 0, 0], max_tokens=5, temperature=0.0, - logprobs=6, + # vLLM has higher default max_logprobs (20 instead of 5) to support + # both Completion API and Chat Completion API + logprobs=30, stream=True, ) async for chunk in stream: diff --git a/vllm/config.py b/vllm/config.py index fa296cd6..c07597b5 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -100,7 +100,7 @@ class ModelConfig: enforce_eager: bool = False, max_context_len_to_capture: Optional[int] = None, max_seq_len_to_capture: Optional[int] = None, - max_logprobs: int = 5, + max_logprobs: int = 20, disable_sliding_window: bool = False, skip_tokenizer_init: bool = False, served_model_name: Optional[Union[str, List[str]]] = None, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index b7e815db..e0aadece 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -48,7 +48,7 @@ class EngineArgs: gpu_memory_utilization: float = 0.90 max_num_batched_tokens: Optional[int] = None max_num_seqs: int = 256 - max_logprobs: int = 5 # OpenAI default value + max_logprobs: int = 20 # Default value for OpenAI Chat Completions API disable_log_stats: bool = False revision: Optional[str] = None code_revision: Optional[str] = None diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 9424ccc9..5419fa21 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -322,9 +322,9 @@ class ChatCompletionRequest(OpenAIBaseModel): raise ValueError( "when using `top_logprobs`, `logprobs` must be set to true." ) - elif not 0 <= data["top_logprobs"] <= 20: + elif data["top_logprobs"] < 0: raise ValueError( - "`top_logprobs` must be a value in the interval [0, 20].") + "`top_logprobs` must be a value a positive value.") return data @@ -478,9 +478,8 @@ class CompletionRequest(OpenAIBaseModel): @classmethod def check_logprobs(cls, data): if "logprobs" in data and data[ - "logprobs"] is not None and not 0 <= data["logprobs"] <= 5: - raise ValueError(("if passed, `logprobs` must be a value", - " in the interval [0, 5].")) + "logprobs"] is not None and not data["logprobs"] >= 0: + raise ValueError("if passed, `logprobs` must be a positive value.") return data @model_validator(mode="before")