[BugFix] Fix Granite model configuration (#8216)

2026-01-26 01:04:28 +08:00 · 2024-09-05 20:39:29 -07:00 · 2024-09-05 20:39:29 -07:00 · baa5467547
commit baa5467547
parent db3bf7c991
2 changed files with 42 additions and 24 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@ -10,12 +10,16 @@ from transformers.models.auto.modeling_auto import (

 from vllm.envs import VLLM_USE_MODELSCOPE
 from vllm.logger import init_logger
+# yapf conflicts with isort for this block
+# yapf: disable
 from vllm.transformers_utils.configs import (ChatGLMConfig, DbrxConfig,
                                             EAGLEConfig, ExaoneConfig,
-                                             InternVLChatConfig, JAISConfig,
-                                             MedusaConfig, MLPSpeculatorConfig,
-                                             MPTConfig, NemotronConfig,
-                                             RWConfig, UltravoxConfig)
+                                             GraniteConfig, InternVLChatConfig,
+                                             JAISConfig, MedusaConfig,
+                                             MLPSpeculatorConfig, MPTConfig,
+                                             NemotronConfig, RWConfig,
+                                             UltravoxConfig)
+# yapf: enable
 from vllm.transformers_utils.utils import check_gguf_file

 if VLLM_USE_MODELSCOPE:
@ -39,6 +43,9 @@ _CONFIG_REGISTRY: Dict[str, Type[PretrainedConfig]] = {
    "internvl_chat": InternVLChatConfig,
    "nemotron": NemotronConfig,
    "ultravox": UltravoxConfig,
+    # Granite can be removed from here once we have upgraded to
+    # transformers 4.45+
+    "granite": GraniteConfig,
 }

 for name, cls in _CONFIG_REGISTRY.items():
@ -62,29 +69,36 @@ def get_config(
        kwargs["gguf_file"] = Path(model).name
        model = Path(model).parent

-    try:
-        config = AutoConfig.from_pretrained(
-            model,
-            trust_remote_code=trust_remote_code,
-            revision=revision,
-            code_revision=code_revision,
-            **kwargs)
-    except ValueError as e:
-        if (not trust_remote_code and
-                "requires you to execute the configuration file" in str(e)):
-            err_msg = (
-                "Failed to load the model config. If the model is a custom "
-                "model not yet available in the HuggingFace transformers "
-                "library, consider setting `trust_remote_code=True` in LLM "
-                "or using the `--trust-remote-code` flag in the CLI.")
-            raise RuntimeError(err_msg) from e
-        else:
-            raise e
-    if config.model_type in _CONFIG_REGISTRY:
-        config_class = _CONFIG_REGISTRY[config.model_type]
+    config_dict, _ = PretrainedConfig.get_config_dict(
+        model, revision=revision, code_revision=code_revision, **kwargs)
+
+    # Use custom model class if it's in our registry
+    model_type = config_dict.get("model_type")
+    if model_type in _CONFIG_REGISTRY:
+        config_class = _CONFIG_REGISTRY[model_type]
        config = config_class.from_pretrained(model,
                                              revision=revision,
                                              code_revision=code_revision)
+    else:
+        try:
+            config = AutoConfig.from_pretrained(
+                model,
+                trust_remote_code=trust_remote_code,
+                revision=revision,
+                code_revision=code_revision,
+                **kwargs)
+        except ValueError as e:
+            if (not trust_remote_code
+                    and "requires you to execute the configuration file"
+                    in str(e)):
+                err_msg = (
+                    "Failed to load the model config. If the model is a custom "
+                    "model not yet available in the HuggingFace transformers "
+                    "library, consider setting `trust_remote_code=True` in LLM "
+                    "or using the `--trust-remote-code` flag in the CLI.")
+                raise RuntimeError(err_msg) from e
+            else:
+                raise e

    # Special architecture mapping check for GGUF models
    if is_gguf:
--- a/vllm/transformers_utils/configs/init.py
+++ b/vllm/transformers_utils/configs/init.py
@ -6,6 +6,7 @@ from vllm.transformers_utils.configs.exaone import ExaoneConfig
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
+from vllm.transformers_utils.configs.granite import GraniteConfig
 from vllm.transformers_utils.configs.internvl import InternVLChatConfig
 from vllm.transformers_utils.configs.jais import JAISConfig
 from vllm.transformers_utils.configs.medusa import MedusaConfig
@ -27,4 +28,7 @@ __all__ = [
    "MLPSpeculatorConfig",
    "NemotronConfig",
    "UltravoxConfig",
+    # Granite can be removed from here once we have upgraded to
+    # transformers 4.45+
+    "GraniteConfig",
 ]