[Frontend] OpenAI API server: Do not add bos token by default when encoding (#4688)

This commit is contained in:
bofeng huang 2024-05-17 03:47:22 +02:00 committed by GitHub
parent 8e7fb5d43a
commit 0150a10630
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 12 deletions

View File

@ -158,7 +158,7 @@ class OpenAIServingChat(OpenAIServing):
try: try:
# Tokenize/detokenize depending on prompt format (string/token list) # Tokenize/detokenize depending on prompt format (string/token list)
prompt_ids, prompt_text = self._validate_prompt_and_tokenize( prompt_ids, prompt_text = self._validate_prompt_and_tokenize(
request, prompt=prompt) request, prompt=prompt, add_special_tokens=False)
sampling_params = request.to_sampling_params() sampling_params = request.to_sampling_params()
lora_request = self._maybe_get_lora(request) lora_request = self._maybe_get_lora(request)
decoding_config = await self.engine.get_decoding_config() decoding_config = await self.engine.get_decoding_config()

View File

@ -1,7 +1,7 @@
import json import json
from dataclasses import dataclass from dataclasses import dataclass
from http import HTTPStatus from http import HTTPStatus
from typing import Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
from pydantic import Field from pydantic import Field
from typing_extensions import Annotated from typing_extensions import Annotated
@ -165,13 +165,14 @@ class OpenAIServing:
raise ValueError(f"The model `{request.model}` does not exist.") raise ValueError(f"The model `{request.model}` does not exist.")
def _validate_prompt_and_tokenize( def _validate_prompt_and_tokenize(
self, self,
request: Union[ChatCompletionRequest, CompletionRequest, request: Union[ChatCompletionRequest, CompletionRequest,
EmbeddingRequest], EmbeddingRequest],
prompt: Optional[str] = None, prompt: Optional[str] = None,
prompt_ids: Optional[List[int]] = None, prompt_ids: Optional[List[int]] = None,
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None truncate_prompt_tokens: Optional[Annotated[int,
) -> Tuple[List[int], str]: Field(ge=1)]] = None,
add_special_tokens: bool = True) -> Tuple[List[int], str]:
if not (prompt or prompt_ids): if not (prompt or prompt_ids):
raise ValueError("Either prompt or prompt_ids should be provided.") raise ValueError("Either prompt or prompt_ids should be provided.")
if (prompt and prompt_ids): if (prompt and prompt_ids):
@ -179,10 +180,19 @@ class OpenAIServing:
"Only one of prompt or prompt_ids should be provided.") "Only one of prompt or prompt_ids should be provided.")
if prompt_ids is None: if prompt_ids is None:
tokenizer_kwargs = {} if truncate_prompt_tokens is None else { # When using OpenAIServingChat for chat completions, the
"truncation": True, # special tokens (e.g., BOS) have already been added by the
"max_length": truncate_prompt_tokens, # chat template. Therefore, we do not need to add them again.
# Set add_special_tokens to False to avoid adding the BOS tokens
# again.
tokenizer_kwargs: Dict[str, Any] = {
"add_special_tokens": add_special_tokens
} }
if truncate_prompt_tokens is not None:
tokenizer_kwargs.update({
"truncation": True,
"max_length": truncate_prompt_tokens,
})
input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids input_ids = self.tokenizer(prompt, **tokenizer_kwargs).input_ids
elif truncate_prompt_tokens is not None: elif truncate_prompt_tokens is not None:
input_ids = prompt_ids[-truncate_prompt_tokens:] input_ids = prompt_ids[-truncate_prompt_tokens:]