From f1f6cc10c77d6ee40b3ce769124cb2760428dc48 Mon Sep 17 00:00:00 2001 From: Federico Galatolo Date: Wed, 24 Jan 2024 19:21:56 +0100 Subject: [PATCH] Added `include_stop_str_in_output` and `length_penalty` parameters to OpenAI API (#2562) --- vllm/entrypoints/openai/protocol.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index fff94366..fc15b783 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -78,6 +78,8 @@ class ChatCompletionRequest(BaseModel): echo: Optional[bool] = False repetition_penalty: Optional[float] = 1.0 min_p: Optional[float] = 0.0 + include_stop_str_in_output: Optional[bool] = False + length_penalty: Optional[float] = 1.0 def to_sampling_params(self) -> SamplingParams: return SamplingParams( @@ -97,6 +99,8 @@ class ChatCompletionRequest(BaseModel): use_beam_search=self.use_beam_search, skip_special_tokens=self.skip_special_tokens, spaces_between_special_tokens=self.spaces_between_special_tokens, + include_stop_str_in_output=self.include_stop_str_in_output, + length_penalty=self.length_penalty, ) @@ -127,6 +131,8 @@ class CompletionRequest(BaseModel): spaces_between_special_tokens: Optional[bool] = True repetition_penalty: Optional[float] = 1.0 min_p: Optional[float] = 0.0 + include_stop_str_in_output: Optional[bool] = False + length_penalty: Optional[float] = 1.0 def to_sampling_params(self): echo_without_generation = self.echo and self.max_tokens == 0 @@ -150,6 +156,8 @@ class CompletionRequest(BaseModel): prompt_logprobs=self.logprobs if self.echo else None, skip_special_tokens=self.skip_special_tokens, spaces_between_special_tokens=(self.spaces_between_special_tokens), + include_stop_str_in_output=self.include_stop_str_in_output, + length_penalty=self.length_penalty, )