[Core][Bugfix] Accept GGUF model without .gguf extension (#8056)
This commit is contained in:
parent
e2b2aa5a0f
commit
4ca65a9763
@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
|
||||
from vllm.executor.executor_base import ExecutorBase
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
||||
from vllm.transformers_utils.utils import check_gguf_file
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -753,7 +754,7 @@ class EngineArgs:
|
||||
|
||||
def create_engine_config(self) -> EngineConfig:
|
||||
# gguf file needs a specific model loader and doesn't use hf_repo
|
||||
if self.model.endswith(".gguf"):
|
||||
if check_gguf_file(self.model):
|
||||
self.quantization = self.load_format = "gguf"
|
||||
|
||||
# bitsandbytes quantization needs a specific model loader
|
||||
|
||||
@ -16,6 +16,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
|
||||
MedusaConfig, MLPSpeculatorConfig,
|
||||
MPTConfig, NemotronConfig,
|
||||
RWConfig, UltravoxConfig)
|
||||
from vllm.transformers_utils.utils import check_gguf_file
|
||||
|
||||
if VLLM_USE_MODELSCOPE:
|
||||
from modelscope import AutoConfig
|
||||
@ -56,7 +57,7 @@ def get_config(
|
||||
) -> PretrainedConfig:
|
||||
|
||||
# Separate model folder from file path for GGUF models
|
||||
is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf"
|
||||
is_gguf = check_gguf_file(model)
|
||||
if is_gguf:
|
||||
kwargs["gguf_file"] = Path(model).name
|
||||
model = Path(model).parent
|
||||
@ -112,7 +113,7 @@ def get_hf_image_processor_config(
|
||||
if VLLM_USE_MODELSCOPE:
|
||||
return dict()
|
||||
# Separate model folder from file path for GGUF models
|
||||
if Path(model).is_file() and Path(model).suffix == ".gguf":
|
||||
if check_gguf_file(model):
|
||||
model = Path(model).parent
|
||||
return get_image_processor_config(model, revision=revision, **kwargs)
|
||||
|
||||
|
||||
@ -12,6 +12,7 @@ from vllm.logger import init_logger
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.transformers_utils.tokenizers import (BaichuanTokenizer,
|
||||
MistralTokenizer)
|
||||
from vllm.transformers_utils.utils import check_gguf_file
|
||||
from vllm.utils import make_async
|
||||
|
||||
logger = init_logger(__name__)
|
||||
@ -96,8 +97,7 @@ def get_tokenizer(
|
||||
kwargs["truncation_side"] = "left"
|
||||
|
||||
# Separate model folder from file path for GGUF models
|
||||
is_gguf = Path(tokenizer_name).is_file() and Path(
|
||||
tokenizer_name).suffix == ".gguf"
|
||||
is_gguf = check_gguf_file(tokenizer_name)
|
||||
if is_gguf:
|
||||
kwargs["gguf_file"] = Path(tokenizer_name).name
|
||||
tokenizer_name = Path(tokenizer_name).parent
|
||||
|
||||
16
vllm/transformers_utils/utils.py
Normal file
16
vllm/transformers_utils/utils.py
Normal file
@ -0,0 +1,16 @@
|
||||
from os import PathLike
|
||||
from pathlib import Path
|
||||
from typing import Union
|
||||
|
||||
|
||||
def check_gguf_file(model: Union[str, PathLike]) -> bool:
|
||||
"""Check if the file is a GGUF model."""
|
||||
model = Path(model)
|
||||
if not model.is_file():
|
||||
return False
|
||||
elif model.suffix == ".gguf":
|
||||
return True
|
||||
|
||||
with open(model, "rb") as f:
|
||||
header = f.read(4)
|
||||
return header == b"GGUF"
|
||||
Loading…
Reference in New Issue
Block a user