[Core][Bugfix] Accept GGUF model without .gguf extension (#8056)
This commit is contained in:
parent
e2b2aa5a0f
commit
4ca65a9763
@ -16,6 +16,7 @@ from vllm.config import (CacheConfig, DecodingConfig, DeviceConfig,
|
|||||||
from vllm.executor.executor_base import ExecutorBase
|
from vllm.executor.executor_base import ExecutorBase
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
|
||||||
|
from vllm.transformers_utils.utils import check_gguf_file
|
||||||
from vllm.utils import FlexibleArgumentParser
|
from vllm.utils import FlexibleArgumentParser
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
@ -753,7 +754,7 @@ class EngineArgs:
|
|||||||
|
|
||||||
def create_engine_config(self) -> EngineConfig:
|
def create_engine_config(self) -> EngineConfig:
|
||||||
# gguf file needs a specific model loader and doesn't use hf_repo
|
# gguf file needs a specific model loader and doesn't use hf_repo
|
||||||
if self.model.endswith(".gguf"):
|
if check_gguf_file(self.model):
|
||||||
self.quantization = self.load_format = "gguf"
|
self.quantization = self.load_format = "gguf"
|
||||||
|
|
||||||
# bitsandbytes quantization needs a specific model loader
|
# bitsandbytes quantization needs a specific model loader
|
||||||
|
|||||||
@ -16,6 +16,7 @@ from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
|
|||||||
MedusaConfig, MLPSpeculatorConfig,
|
MedusaConfig, MLPSpeculatorConfig,
|
||||||
MPTConfig, NemotronConfig,
|
MPTConfig, NemotronConfig,
|
||||||
RWConfig, UltravoxConfig)
|
RWConfig, UltravoxConfig)
|
||||||
|
from vllm.transformers_utils.utils import check_gguf_file
|
||||||
|
|
||||||
if VLLM_USE_MODELSCOPE:
|
if VLLM_USE_MODELSCOPE:
|
||||||
from modelscope import AutoConfig
|
from modelscope import AutoConfig
|
||||||
@ -56,7 +57,7 @@ def get_config(
|
|||||||
) -> PretrainedConfig:
|
) -> PretrainedConfig:
|
||||||
|
|
||||||
# Separate model folder from file path for GGUF models
|
# Separate model folder from file path for GGUF models
|
||||||
is_gguf = Path(model).is_file() and Path(model).suffix == ".gguf"
|
is_gguf = check_gguf_file(model)
|
||||||
if is_gguf:
|
if is_gguf:
|
||||||
kwargs["gguf_file"] = Path(model).name
|
kwargs["gguf_file"] = Path(model).name
|
||||||
model = Path(model).parent
|
model = Path(model).parent
|
||||||
@ -112,7 +113,7 @@ def get_hf_image_processor_config(
|
|||||||
if VLLM_USE_MODELSCOPE:
|
if VLLM_USE_MODELSCOPE:
|
||||||
return dict()
|
return dict()
|
||||||
# Separate model folder from file path for GGUF models
|
# Separate model folder from file path for GGUF models
|
||||||
if Path(model).is_file() and Path(model).suffix == ".gguf":
|
if check_gguf_file(model):
|
||||||
model = Path(model).parent
|
model = Path(model).parent
|
||||||
return get_image_processor_config(model, revision=revision, **kwargs)
|
return get_image_processor_config(model, revision=revision, **kwargs)
|
||||||
|
|
||||||
|
|||||||
@ -12,6 +12,7 @@ from vllm.logger import init_logger
|
|||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.transformers_utils.tokenizers import (BaichuanTokenizer,
|
from vllm.transformers_utils.tokenizers import (BaichuanTokenizer,
|
||||||
MistralTokenizer)
|
MistralTokenizer)
|
||||||
|
from vllm.transformers_utils.utils import check_gguf_file
|
||||||
from vllm.utils import make_async
|
from vllm.utils import make_async
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
@ -96,8 +97,7 @@ def get_tokenizer(
|
|||||||
kwargs["truncation_side"] = "left"
|
kwargs["truncation_side"] = "left"
|
||||||
|
|
||||||
# Separate model folder from file path for GGUF models
|
# Separate model folder from file path for GGUF models
|
||||||
is_gguf = Path(tokenizer_name).is_file() and Path(
|
is_gguf = check_gguf_file(tokenizer_name)
|
||||||
tokenizer_name).suffix == ".gguf"
|
|
||||||
if is_gguf:
|
if is_gguf:
|
||||||
kwargs["gguf_file"] = Path(tokenizer_name).name
|
kwargs["gguf_file"] = Path(tokenizer_name).name
|
||||||
tokenizer_name = Path(tokenizer_name).parent
|
tokenizer_name = Path(tokenizer_name).parent
|
||||||
|
|||||||
16
vllm/transformers_utils/utils.py
Normal file
16
vllm/transformers_utils/utils.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
from os import PathLike
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
|
||||||
|
def check_gguf_file(model: Union[str, PathLike]) -> bool:
|
||||||
|
"""Check if the file is a GGUF model."""
|
||||||
|
model = Path(model)
|
||||||
|
if not model.is_file():
|
||||||
|
return False
|
||||||
|
elif model.suffix == ".gguf":
|
||||||
|
return True
|
||||||
|
|
||||||
|
with open(model, "rb") as f:
|
||||||
|
header = f.read(4)
|
||||||
|
return header == b"GGUF"
|
||||||
Loading…
Reference in New Issue
Block a user