[BugFix] Fix Granite model configuration (#8216)

2024-09-05 20:39:29 -07:00 · 2024-09-05 20:39:29 -07:00 · baa5467547
commit baa5467547
parent db3bf7c991
2 changed files with 42 additions and 24 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@ -10,12 +10,16 @@ from transformers.models.auto.modeling_auto import (
 from vllm.envs import VLLM_USE_MODELSCOPE
 from vllm.logger import init_logger
 # yapf conflicts with isort for this block
 # yapf: disable
 from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
                                             EAGLEConfig, ExaoneConfig,
-                                             InternVLChatConfig, JAISConfig,
+                                             GraniteConfig, InternVLChatConfig,
-                                             MedusaConfig, MLPSpeculatorConfig,
+                                             JAISConfig, MedusaConfig,
-                                             MPTConfig, NemotronConfig,
+                                             MLPSpeculatorConfig, MPTConfig,
-                                             RWConfig, UltravoxConfig)
+                                             NemotronConfig, RWConfig,
                                             UltravoxConfig)
 # yapf: enable
 from vllm.transformers_utils.utils import check_gguf_file
 if VLLM_USE_MODELSCOPE:
@ -39,6 +43,9 @@ _CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
    "internvl_chat": InternVLChatConfig,
    "nemotron": NemotronConfig,
    "ultravox": UltravoxConfig,
    # Granite can be removed from here once we have upgraded to
    # transformers 4.45+
    "granite": GraniteConfig,
 }
 for name, cls in _CONFIG_REGISTRY.items():
@ -62,29 +69,36 @@ def get_config(
        kwargs["gguf_file"] = Path(model).name
        model = Path(model).parent
-    try:
+    config_dict, _ = PretrainedConfig.get_config_dict(
-        config = AutoConfig.from_pretrained(
+        model, revision=revision, code_revision=code_revision, **kwargs)
-            model,
+
-            trust_remote_code=trust_remote_code,
+    # Use custom model class if it's in our registry
-            revision=revision,
+    model_type = config_dict.get("model_type")
-            code_revision=code_revision,
+    if model_type in _CONFIG_REGISTRY:
-            **kwargs)
+        config_class = _CONFIG_REGISTRY[model_type]
    except ValueError as e:
        if (not trust_remote_code and
                "requires you to execute the configuration file" in str(e)):
            err_msg = (
                "Failed to load the model config. If the model is a custom "
                "model not yet available in the HuggingFace transformers "
                "library, consider setting `trust_remote_code=True` in LLM "
                "or using the `--trust-remote-code` flag in the CLI.")
            raise RuntimeError(err_msg) from e
        else:
            raise e
    if config.model_type in _CONFIG_REGISTRY:
        config_class = _CONFIG_REGISTRY[config.model_type]
        config = config_class.from_pretrained(model,
                                              revision=revision,
                                              code_revision=code_revision)
    else:
        try:
            config = AutoConfig.from_pretrained(
                model,
                trust_remote_code=trust_remote_code,
                revision=revision,
                code_revision=code_revision,
                **kwargs)
        except ValueError as e:
            if (not trust_remote_code
                    and "requires you to execute the configuration file"
                    in str(e)):
                err_msg = (
                    "Failed to load the model config. If the model is a custom "
                    "model not yet available in the HuggingFace transformers "
                    "library, consider setting `trust_remote_code=True` in LLM "
                    "or using the `--trust-remote-code` flag in the CLI.")
                raise RuntimeError(err_msg) from e
            else:
                raise e
    # Special architecture mapping check for GGUF models
    if is_gguf:
--- a/vllm/transformers_utils/configs/init.py
+++ b/vllm/transformers_utils/configs/init.py
@ -6,6 +6,7 @@ from vllm.transformers_utils.configs.exaone import ExaoneConfig
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
 from vllm.transformers_utils.configs.granite import GraniteConfig
 from vllm.transformers_utils.configs.internvl import InternVLChatConfig
 from vllm.transformers_utils.configs.jais import JAISConfig
 from vllm.transformers_utils.configs.medusa import MedusaConfig
@ -27,4 +28,7 @@ __all__ = [
    "MLPSpeculatorConfig",
    "NemotronConfig",
    "UltravoxConfig",
    # Granite can be removed from here once we have upgraded to
    # transformers 4.45+
    "GraniteConfig",
 ]