[Frontend] OpenAI API server: Do not add bos token by default when encoding (#4688)
This commit is contained in:
parent
8e7fb5d43a
commit
0150a10630
@ -158,7 +158,7 @@ class OpenAIServingChat(OpenAIServing):
|
|||||||
try:
|
try:
|
||||||
# Tokenize/detokenize depending on prompt format (string/token list)
|
# Tokenize/detokenize depending on prompt format (string/token list)
|
||||||
prompt_ids, prompt_text = self._validate_prompt_and_tokenize(
|
prompt_ids, prompt_text = self._validate_prompt_and_tokenize(
|
||||||
request, prompt=prompt)
|
request, prompt=prompt, add_special_tokens=False)
|
||||||
sampling_params = request.to_sampling_params()
|
sampling_params = request.to_sampling_params()
|
||||||
lora_request = self._maybe_get_lora(request)
|
lora_request = self._maybe_get_lora(request)
|
||||||
decoding_config = await self.engine.get_decoding_config()
|
decoding_config = await self.engine.get_decoding_config()
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
from typing import Dict, List, Optional, Tuple, Union
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
@ -165,13 +165,14 @@ class OpenAIServing:
|
|||||||
raise ValueError(f"The model `{request.model}` does not exist.")
|
raise ValueError(f"The model `{request.model}` does not exist.")
|
||||||
|
|
||||||
def _validate_prompt_and_tokenize(
|
def _validate_prompt_and_tokenize(
|
||||||
self,
|
self,
|
||||||
request: Union[ChatCompletionRequest, CompletionRequest,
|
request: Union[ChatCompletionRequest, CompletionRequest,
|
||||||
EmbeddingRequest],
|
EmbeddingRequest],
|
||||||
prompt: Optional[str] = None,
|
prompt: Optional[str] = None,
|
||||||
prompt_ids: Optional[List[int]] = None,
|
prompt_ids: Optional[List[int]] = None,
|
||||||
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
|
truncate_prompt_tokens: Optional[Annotated[int,
|
||||||
) -> Tuple[List[int], str]:
|
Field(ge=1)]] = None,
|
||||||
|
add_special_tokens: bool = True) -> Tuple[List[int], str]:
|
||||||
if not (prompt or prompt_ids):
|
if not (prompt or prompt_ids):
|
||||||
raise ValueError("Either prompt or prompt_ids should be provided.")
|
raise ValueError("Either prompt or prompt_ids should be provided.")
|
||||||
if (prompt and prompt_ids):
|
if (prompt and prompt_ids):
|
||||||
@ -179,10 +180,19 @@ class OpenAIServing:
|
|||||||
"Only one of prompt or prompt_ids should be provided.")
|
"Only one of prompt or prompt_ids should be provided.")
|
||||||
|
|
||||||
if prompt_ids is None:
|
if prompt_ids is None:
|
||||||
tokenizer_kwargs = {} if truncate_prompt_tokens is None else {
|
# When using OpenAIServingChat for chat completions, the
|
||||||
"truncation": True,
|
# special tokens (e.g., BOS) have already been added by the
|
||||||
"max_length": truncate_prompt_tokens,
|
# chat template. Therefore, we do not need to add them again.
|
||||||
|
# Set add_special_tokens to False to avoid adding the BOS tokens
|
||||||
|
# again.
|
||||||
|
tokenizer_kwargs: Dict[str, Any] = {
|
||||||
|
"add_special_tokens": add_special_tokens
|
||||||
}
|
}
|
||||||
|
if truncate_prompt_tokens is not None:
|
||||||
|
tokenizer_kwargs.update({
|
||||||
|
"truncation": True,
|
||||||
|
"max_length": truncate_prompt_tokens,
|
||||||
|
})
|
||||||
input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids
|
input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids
|
||||||
elif truncate_prompt_tokens is not None:
|
elif truncate_prompt_tokens is not None:
|
||||||
input_ids = prompt_ids[-truncate_prompt_tokens:]
|
input_ids = prompt_ids[-truncate_prompt_tokens:]
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user