[Core][Bugfix] Accept GGUF model without .gguf extension (#8056)

This commit is contained in:
Isotr0py 2024-09-02 20:43:26 +08:00 committed by GitHub
parent e2b2aa5a0f
commit 4ca65a9763
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 23 additions and 5 deletions

View File

@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
from vllm.executor.executor_base import ExecutorBase
from vllm.logger import init_logger
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
from vllm.transformers_utils.utils import check_gguf_file
from vllm.utils import FlexibleArgumentParser
if TYPE_CHECKING:
@ -753,7 +754,7 @@ class EngineArgs:
def create_engine_config(self) -> EngineConfig:
# gguf file needs a specific model loader and doesn't use hf_repo
if self.model.endswith(".gguf"):
if check_gguf_file(self.model):
self.quantization = self.load_format = "gguf"
# bitsandbytes quantization needs a specific model loader

View File

@ -16,6 +16,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
MedusaConfig, MLPSpeculatorConfig,
MPTConfig, NemotronConfig,
RWConfig, UltravoxConfig)
from vllm.transformers_utils.utils import check_gguf_file
if VLLM_USE_MODELSCOPE:
from modelscope import AutoConfig
@ -56,7 +57,7 @@ def get_config(
) -> PretrainedConfig:
# Separate model folder from file path for GGUF models
is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf"
is_gguf = check_gguf_file(model)
if is_gguf:
kwargs["gguf_file"] = Path(model).name
model = Path(model).parent
@ -112,7 +113,7 @@ def get_hf_image_processor_config(
if VLLM_USE_MODELSCOPE:
return dict()
# Separate model folder from file path for GGUF models
if Path(model).is_file() and Path(model).suffix == ".gguf":
if check_gguf_file(model):
model = Path(model).parent
return get_image_processor_config(model, revision=revision, **kwargs)

View File

@ -12,6 +12,7 @@ from vllm.logger import init_logger
from vllm.lora.request import LoRARequest
from vllm.transformers_utils.tokenizers import (BaichuanTokenizer,
MistralTokenizer)
from vllm.transformers_utils.utils import check_gguf_file
from vllm.utils import make_async
logger = init_logger(__name__)
@ -96,8 +97,7 @@ def get_tokenizer(
kwargs["truncation_side"] = "left"
# Separate model folder from file path for GGUF models
is_gguf = Path(tokenizer_name).is_file() and Path(
tokenizer_name).suffix == ".gguf"
is_gguf = check_gguf_file(tokenizer_name)
if is_gguf:
kwargs["gguf_file"] = Path(tokenizer_name).name
tokenizer_name = Path(tokenizer_name).parent

View File

@ -0,0 +1,16 @@
from os import PathLike
from pathlib import Path
from typing import Union
def check_gguf_file(model: Union[str, PathLike]) -> bool:
"""Check if the file is a GGUF model."""
model = Path(model)
if not model.is_file():
return False
elif model.suffix == ".gguf":
return True
with open(model, "rb") as f:
header = f.read(4)
return header == b"GGUF"