diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 717232c4..a67d2f83 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -8,9 +8,6 @@ from vllm.transformers_utils.tokenizers import * logger = init_logger(__name__) -# A fast LLaMA tokenizer with the pre-processed `tokenizer.json` file. -_FAST_LLAMA_TOKENIZER = "hf-internal-testing/llama-tokenizer" - def get_tokenizer( tokenizer_name: str, @@ -27,13 +24,6 @@ def get_tokenizer( "Cannot use the fast tokenizer in slow tokenizer mode.") kwargs["use_fast"] = False - if ("llama" in tokenizer_name.lower() and kwargs.get("use_fast", True) - and tokenizer_name != _FAST_LLAMA_TOKENIZER): - logger.info( - "For some LLaMA V1 models, initializing the fast tokenizer may " - "take a long time. To reduce the initialization time, consider " - f"using '{_FAST_LLAMA_TOKENIZER}' instead of the original " - "tokenizer.") try: tokenizer = AutoTokenizer.from_pretrained( tokenizer_name, @@ -41,13 +31,6 @@ def get_tokenizer( trust_remote_code=trust_remote_code, tokenizer_revision=tokenizer_revision, **kwargs) - except TypeError as e: - # The LLaMA tokenizer causes a protobuf error in some environments. - err_msg = ( - "Failed to load the tokenizer. If you are using a LLaMA V1 model " - f"consider using '{_FAST_LLAMA_TOKENIZER}' instead of the " - "original tokenizer.") - raise RuntimeError(err_msg) from e except ValueError as e: # If the error pertains to the tokenizer class not existing or not # currently being imported, suggest using the --trust-remote-code flag.