From 4ca65a97638054ed04b37c2bf3e868d4c1209e9c Mon Sep 17 00:00:00 2001 From: Isotr0py <2037008807@qq.com> Date: Mon, 2 Sep 2024 20:43:26 +0800 Subject: [PATCH] [Core][Bugfix] Accept GGUF model without .gguf extension (#8056) --- vllm/engine/arg_utils.py | 3 ++- vllm/transformers_utils/config.py | 5 +++-- vllm/transformers_utils/tokenizer.py | 4 ++-- vllm/transformers_utils/utils.py | 16 ++++++++++++++++ 4 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 vllm/transformers_utils/utils.py diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d98f57bc..8dbe6504 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig, from vllm.executor.executor_base import ExecutorBase from vllm.logger import init_logger from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS +from vllm.transformers_utils.utils import check_gguf_file from vllm.utils import FlexibleArgumentParser if TYPE_CHECKING: @@ -753,7 +754,7 @@ class EngineArgs: def create_engine_config(self) -> EngineConfig: # gguf file needs a specific model loader and doesn't use hf_repo - if self.model.endswith(".gguf"): + if check_gguf_file(self.model): self.quantization = self.load_format = "gguf" # bitsandbytes quantization needs a specific model loader diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index f3ac8d31..dfe83ddb 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -16,6 +16,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig, MedusaConfig, MLPSpeculatorConfig, MPTConfig, NemotronConfig, RWConfig, UltravoxConfig) +from vllm.transformers_utils.utils import check_gguf_file if VLLM_USE_MODELSCOPE: from modelscope import AutoConfig @@ -56,7 +57,7 @@ def get_config( ) -> PretrainedConfig: # Separate model folder from file path for GGUF models - is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf" + is_gguf = check_gguf_file(model) if is_gguf: kwargs["gguf_file"] = Path(model).name model = Path(model).parent @@ -112,7 +113,7 @@ def get_hf_image_processor_config( if VLLM_USE_MODELSCOPE: return dict() # Separate model folder from file path for GGUF models - if Path(model).is_file() and Path(model).suffix == ".gguf": + if check_gguf_file(model): model = Path(model).parent return get_image_processor_config(model, revision=revision, **kwargs) diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index 28669758..f9fb8d1e 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -12,6 +12,7 @@ from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.transformers_utils.tokenizers import (BaichuanTokenizer, MistralTokenizer) +from vllm.transformers_utils.utils import check_gguf_file from vllm.utils import make_async logger = init_logger(__name__) @@ -96,8 +97,7 @@ def get_tokenizer( kwargs["truncation_side"] = "left" # Separate model folder from file path for GGUF models - is_gguf = Path(tokenizer_name).is_file() and Path( - tokenizer_name).suffix == ".gguf" + is_gguf = check_gguf_file(tokenizer_name) if is_gguf: kwargs["gguf_file"] = Path(tokenizer_name).name tokenizer_name = Path(tokenizer_name).parent diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py new file mode 100644 index 00000000..7a9041b0 --- /dev/null +++ b/vllm/transformers_utils/utils.py @@ -0,0 +1,16 @@ +from os import PathLike +from pathlib import Path +from typing import Union + + +def check_gguf_file(model: Union[str, PathLike]) -> bool: + """Check if the file is a GGUF model.""" + model = Path(model) + if not model.is_file(): + return False + elif model.suffix == ".gguf": + return True + + with open(model, "rb") as f: + header = f.read(4) + return header == b"GGUF"