[Core][Bugfix] Accept GGUF model without .gguf extension (#8056)

2024-09-02 20:43:26 +08:00 · 2024-09-02 20:43:26 +08:00 · 4ca65a9763
commit 4ca65a9763
parent e2b2aa5a0f
4 changed files with 23 additions and 5 deletions
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
 from vllm.executor.executor_base import ExecutorBase
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
+from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import FlexibleArgumentParser

 if TYPE_CHECKING:
@ -753,7 +754,7 @@ class EngineArgs:

    def create_engine_config(self) -> EngineConfig:
        # gguf file needs a specific model loader and doesn't use hf_repo
-        if self.model.endswith(".gguf"):
+        if check_gguf_file(self.model):
            self.quantization = self.load_format = "gguf"

        # bitsandbytes quantization needs a specific model loader
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@ -16,6 +16,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
                                             MedusaConfig, MLPSpeculatorConfig,
                                             MPTConfig, NemotronConfig,
                                             RWConfig, UltravoxConfig)
+from vllm.transformers_utils.utils import check_gguf_file

 if VLLM_USE_MODELSCOPE:
    from modelscope import AutoConfig
@ -56,7 +57,7 @@ def get_config(
 ) -> PretrainedConfig:

    # Separate model folder from file path for GGUF models
-    is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf"
+    is_gguf = check_gguf_file(model)
    if is_gguf:
        kwargs["gguf_file"] = Path(model).name
        model = Path(model).parent
@ -112,7 +113,7 @@ def get_hf_image_processor_config(
    if VLLM_USE_MODELSCOPE:
        return dict()
    # Separate model folder from file path for GGUF models
-    if Path(model).is_file() and Path(model).suffix == ".gguf":
+    if check_gguf_file(model):
        model = Path(model).parent
    return get_image_processor_config(model, revision=revision, **kwargs)

--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@ -12,6 +12,7 @@ from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.transformers_utils.tokenizers import (BaichuanTokenizer,
                                                MistralTokenizer)
+from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import make_async

 logger = init_logger(__name__)
@ -96,8 +97,7 @@ def get_tokenizer(
        kwargs["truncation_side"] = "left"

    # Separate model folder from file path for GGUF models
-    is_gguf = Path(tokenizer_name).is_file() and Path(
-        tokenizer_name).suffix == ".gguf"
+    is_gguf = check_gguf_file(tokenizer_name)
    if is_gguf:
        kwargs["gguf_file"] = Path(tokenizer_name).name
        tokenizer_name = Path(tokenizer_name).parent
--- a/vllm/transformers_utils/utils.py
+++ b/vllm/transformers_utils/utils.py
@ -0,0 +1,16 @@
+from os import PathLike
+from pathlib import Path
+from typing import Union
+
+
+def check_gguf_file(model: Union[str, PathLike]) -> bool:
+    """Check if the file is a GGUF model."""
+    model = Path(model)
+    if not model.is_file():
+        return False
+    elif model.suffix == ".gguf":
+        return True
+
+    with open(model, "rb") as f:
+        header = f.read(4)
+    return header == b"GGUF"