From 4ca65a97638054ed04b37c2bf3e868d4c1209e9c Mon Sep 17 00:00:00 2001
From: Isotr0py <2037008807@qq.com>
Date: Mon, 2 Sep 2024 20:43:26 +0800
Subject: [PATCH] [Core][Bugfix] Accept GGUF model without .gguf extension
 (#8056)

---
 vllm/engine/arg_utils.py             |  3 ++-
 vllm/transformers_utils/config.py    |  5 +++--
 vllm/transformers_utils/tokenizer.py |  4 ++--
 vllm/transformers_utils/utils.py     | 16 ++++++++++++++++
 4 files changed, 23 insertions(+), 5 deletions(-)
 create mode 100644 vllm/transformers_utils/utils.py

diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index d98f57bc..8dbe6504 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
 from vllm.executor.executor_base import ExecutorBase
 from vllm.logger import init_logger
 from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
+from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import FlexibleArgumentParser
 
 if TYPE_CHECKING:
@@ -753,7 +754,7 @@ class EngineArgs:
 
     def create_engine_config(self) -> EngineConfig:
         # gguf file needs a specific model loader and doesn't use hf_repo
-        if self.model.endswith(".gguf"):
+        if check_gguf_file(self.model):
             self.quantization = self.load_format = "gguf"
 
         # bitsandbytes quantization needs a specific model loader
diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py
index f3ac8d31..dfe83ddb 100644
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@@ -16,6 +16,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
                                              MedusaConfig, MLPSpeculatorConfig,
                                              MPTConfig, NemotronConfig,
                                              RWConfig, UltravoxConfig)
+from vllm.transformers_utils.utils import check_gguf_file
 
 if VLLM_USE_MODELSCOPE:
     from modelscope import AutoConfig
@@ -56,7 +57,7 @@ def get_config(
 ) -> PretrainedConfig:
 
     # Separate model folder from file path for GGUF models
-    is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf"
+    is_gguf = check_gguf_file(model)
     if is_gguf:
         kwargs["gguf_file"] = Path(model).name
         model = Path(model).parent
@@ -112,7 +113,7 @@ def get_hf_image_processor_config(
     if VLLM_USE_MODELSCOPE:
         return dict()
     # Separate model folder from file path for GGUF models
-    if Path(model).is_file() and Path(model).suffix == ".gguf":
+    if check_gguf_file(model):
         model = Path(model).parent
     return get_image_processor_config(model, revision=revision, **kwargs)
 
diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py
index 28669758..f9fb8d1e 100644
--- a/vllm/transformers_utils/tokenizer.py
+++ b/vllm/transformers_utils/tokenizer.py
@@ -12,6 +12,7 @@ from vllm.logger import init_logger
 from vllm.lora.request import LoRARequest
 from vllm.transformers_utils.tokenizers import (BaichuanTokenizer,
                                                 MistralTokenizer)
+from vllm.transformers_utils.utils import check_gguf_file
 from vllm.utils import make_async
 
 logger = init_logger(__name__)
@@ -96,8 +97,7 @@ def get_tokenizer(
         kwargs["truncation_side"] = "left"
 
     # Separate model folder from file path for GGUF models
-    is_gguf = Path(tokenizer_name).is_file() and Path(
-        tokenizer_name).suffix == ".gguf"
+    is_gguf = check_gguf_file(tokenizer_name)
     if is_gguf:
         kwargs["gguf_file"] = Path(tokenizer_name).name
         tokenizer_name = Path(tokenizer_name).parent
diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py
new file mode 100644
index 00000000..7a9041b0
--- /dev/null
+++ b/vllm/transformers_utils/utils.py
@@ -0,0 +1,16 @@
+from os import PathLike
+from pathlib import Path
+from typing import Union
+
+
+def check_gguf_file(model: Union[str, PathLike]) -> bool:
+    """Check if the file is a GGUF model."""
+    model = Path(model)
+    if not model.is_file():
+        return False
+    elif model.suffix == ".gguf":
+        return True
+
+    with open(model, "rb") as f:
+        header = f.read(4)
+    return header == b"GGUF"