[Misc] Clean up InternVL family config registration (#19992)

Signed-off-by: Isotr0py <2037008807@qq.com>
2026-06-06 06:09:08 +08:00 · 2025-07-04 04:01:47 +08:00 · 2025-07-04 04:01:47 +08:00 · 71d6de3a26
commit 71d6de3a26
parent 536fd33003
5 changed files with 40 additions and 82 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@ -33,10 +33,8 @@ from vllm.logger import init_logger
 from vllm.transformers_utils.configs import (ChatGLMConfig, Cohere2Config,
                                             DbrxConfig, DeepseekVLV2Config,
                                             EAGLEConfig, ExaoneConfig,
-                                             H2OVLChatConfig,
+                                             JAISConfig, KimiVLConfig,
-                                             InternVLChatConfig, JAISConfig,
+                                             MedusaConfig, MiniMaxText01Config,
                                             KimiVLConfig, MedusaConfig,
                                             MiniMaxText01Config,
                                             MiniMaxVL01Config, MllamaConfig,
                                             MLPSpeculatorConfig, MPTConfig,
                                             NemotronConfig, NVLM_D_Config,
@ -90,8 +88,6 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
    "medusa": MedusaConfig,
    "eagle": EAGLEConfig,
    "exaone": ExaoneConfig,
    "h2ovl_chat": H2OVLChatConfig,
    "internvl_chat": InternVLChatConfig,
    "minimax_text_01": MiniMaxText01Config,
    "minimax_vl_01": MiniMaxVL01Config,
    "nemotron": NemotronConfig,
@ -104,6 +100,10 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = {
    **_CONFIG_REGISTRY_OVERRIDE_HF
 }
 _CONFIG_ATTRS_MAPPING: dict[str, str] = {
    "llm_config": "text_config",
 }
 class ConfigFormat(str, enum.Enum):
    AUTO = "auto"
@ -286,6 +286,18 @@ def is_encoder_decoder(config: PretrainedConfig) -> bool:
    return getattr(config, "is_encoder_decoder", False)
 def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig:
    """Remap config attributes to match the expected names."""
    for old_attr, new_attr in _CONFIG_ATTRS_MAPPING.items():
        if hasattr(config, old_attr):
            if not hasattr(config, new_attr):
                config.update({new_attr: getattr(config, old_attr)})
            delattr(config, old_attr)
            logger.debug("Remapped config attribute '%s' to '%s'", old_attr,
                         new_attr)
    return config
 def get_config(
    model: Union[str, Path],
    trust_remote_code: bool,
@ -361,6 +373,9 @@ def get_config(
                    revision=revision,
                    code_revision=code_revision,
                    token=_get_hf_token(),
                    # some old custom model's config needs
                    # `has_no_defaults_at_init=True` to work.
                    has_no_defaults_at_init=trust_remote_code,
                    **kwargs,
                )
            except ValueError as e:
@ -376,6 +391,7 @@ def get_config(
                    raise RuntimeError(err_msg) from e
                else:
                    raise e
        config = _maybe_remap_hf_config_attrs(config)
    elif config_format == ConfigFormat.MISTRAL:
        config = load_params_config(model, revision, **kwargs)
--- a/vllm/transformers_utils/configs/init.py
+++ b/vllm/transformers_utils/configs/init.py
@ -11,8 +11,6 @@ from vllm.transformers_utils.configs.exaone import ExaoneConfig
 # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the
 # `FalconConfig` class from the official HuggingFace transformers library.
 from vllm.transformers_utils.configs.falcon import RWConfig
 from vllm.transformers_utils.configs.h2ovl import H2OVLChatConfig
 from vllm.transformers_utils.configs.internvl import InternVLChatConfig
 from vllm.transformers_utils.configs.jais import JAISConfig
 from vllm.transformers_utils.configs.kimi_vl import KimiVLConfig
 from vllm.transformers_utils.configs.medusa import MedusaConfig
@ -38,8 +36,6 @@ __all__ = [
    "DeepseekVLV2Config",
    "MPTConfig",
    "RWConfig",
    "H2OVLChatConfig",
    "InternVLChatConfig",
    "JAISConfig",
    "MedusaConfig",
    "EAGLEConfig",
--- a/vllm/transformers_utils/configs/h2ovl.py
+++ b/vllm/transformers_utils/configs/h2ovl.py
@ -1,16 +0,0 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # Adapted from
 # https://huggingface.co/h2oai/h2ovl-mississippi-2b/blob/main/configuration_h2ovl_chat.py
 # --------------------------------------------------------
 # H2OVL-Mississippi
 # Copyright (c) 2024 H2O.AI
 # Licensed under Apache 2.0 License [see LICENSE for details]
 # --------------------------------------------------------
 from .internvl import InternVLChatConfig
 class H2OVLChatConfig(InternVLChatConfig):
    model_type = "h2ovl_chat"
--- a/vllm/transformers_utils/configs/internvl.py
+++ b/vllm/transformers_utils/configs/internvl.py
@ -1,54 +0,0 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 # Adapted from
 # https://huggingface.co/OpenGVLab/InternVL2-1B/blob/main/configuration_internvl_chat.py
 # --------------------------------------------------------
 # InternVL
 # Copyright (c) 2024 OpenGVLab
 # Licensed under The MIT License [see LICENSE for details]
 # --------------------------------------------------------
 from transformers.configuration_utils import PretrainedConfig
 class InternVLChatConfig(PretrainedConfig):
    model_type = 'internvl_chat'
    is_composition = True
    def __init__(self,
                 vision_config=None,
                 llm_config=None,
                 use_backbone_lora=0,
                 use_llm_lora=0,
                 select_layer=-1,
                 force_image_size=None,
                 downsample_ratio=0.5,
                 template=None,
                 dynamic_image_size=False,
                 use_thumbnail=False,
                 ps_version='v1',
                 min_dynamic_patch=1,
                 max_dynamic_patch=6,
                 **kwargs):
        super().__init__(**kwargs)
        if vision_config is None:
            vision_config = {}
        if llm_config is None:
            llm_config = {}
        self.vision_config = PretrainedConfig(**vision_config)
        self.text_config = PretrainedConfig(**llm_config)
        self.use_backbone_lora = use_backbone_lora
        self.use_llm_lora = use_llm_lora
        self.select_layer = select_layer
        self.force_image_size = force_image_size
        self.downsample_ratio = downsample_ratio
        self.template = template
        self.dynamic_image_size = dynamic_image_size
        self.use_thumbnail = use_thumbnail
        self.ps_version = ps_version  # pixel shuffle version
        self.min_dynamic_patch = min_dynamic_patch
        self.max_dynamic_patch = max_dynamic_patch
--- a/vllm/transformers_utils/configs/nvlm_d.py
+++ b/vllm/transformers_utils/configs/nvlm_d.py
@ -8,8 +8,24 @@
 # Copyright (c) 2024 NVIDIA
 # Licensed under Apache 2.0 License [see LICENSE for details]
 # --------------------------------------------------------
-from .internvl import InternVLChatConfig
+from transformers import Qwen2Config
 from transformers.configuration_utils import PretrainedConfig
-class NVLM_D_Config(InternVLChatConfig):
+class NVLM_D_Config(PretrainedConfig):
    model_type = 'NVLM_D'
    is_composition = True
    def __init__(self, vision_config=None, llm_config=None, **kwargs):
        super().__init__(**kwargs)
        # Handle vision_config initialization
        if vision_config is None:
            vision_config = {}
        # Handle llm_config initialization
        if llm_config is None:
            llm_config = {}
        self.vision_config = PretrainedConfig(**vision_config)
        self.text_config = Qwen2Config(**llm_config)