From 67d96c29fba9b72cb4c4edbc26211c208a00ebdd Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 19 Jun 2023 23:19:47 -0700 Subject: [PATCH] Use slow tokenizer for open llama models (#168) --- vllm/engine/tokenizer_utils.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/vllm/engine/tokenizer_utils.py b/vllm/engine/tokenizer_utils.py index 65b28251..1a0115e6 100644 --- a/vllm/engine/tokenizer_utils.py +++ b/vllm/engine/tokenizer_utils.py @@ -17,7 +17,12 @@ def get_tokenizer( ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: """Gets a tokenizer for the given model name via Huggingface.""" config = AutoConfig.from_pretrained(model_name) - if config.model_type == "llama" and getattr(kwargs, "use_fast", True): + if "open_llama" in model_name: + kwargs["use_fast"] = False + logger.info( + "OpenLLaMA models do not support the fast tokenizer. " + "Using the slow tokenizer instead.") + elif config.model_type == "llama" and getattr(kwargs, "use_fast", True): # LLaMA fast tokenizer causes protobuf errors in some environments. # However, we found that the below LLaMA fast tokenizer works well in # most environments.