diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index b1f4e3e2a9831..771f16fe05106 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -81,7 +81,6 @@ _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = LazyConfigDict( flex_olmo="FlexOlmoConfig", kimi_linear="KimiLinearConfig", kimi_vl="KimiVLConfig", - Llama_Nemotron_Nano_VL="Nemotron_Nano_VL_Config", RefinedWeb="RWConfig", # For tiiuae/falcon-40b(-instruct) RefinedWebModel="RWConfig", # For tiiuae/falcon-7b(-instruct) jais="JAISConfig", @@ -106,6 +105,7 @@ _CONFIG_ATTRS_MAPPING: dict[str, str] = { _AUTO_CONFIG_KWARGS_OVERRIDES: dict[str, dict[str, Any]] = { "internvl_chat": {"has_no_defaults_at_init": True}, + "Llama_Nemotron_Nano_VL": {"attn_implementation": "eager"}, "NVLM_D": {"has_no_defaults_at_init": True}, } diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index 663a8e44d71dd..405a2f6b23954 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -28,7 +28,6 @@ from vllm.transformers_utils.configs.mlp_speculator import MLPSpeculatorConfig from vllm.transformers_utils.configs.moonvit import MoonViTConfig from vllm.transformers_utils.configs.nemotron import NemotronConfig from vllm.transformers_utils.configs.nemotron_h import NemotronHConfig -from vllm.transformers_utils.configs.nemotron_vl import Nemotron_Nano_VL_Config from vllm.transformers_utils.configs.olmo3 import Olmo3Config from vllm.transformers_utils.configs.ovis import OvisConfig from vllm.transformers_utils.configs.qwen3_next import Qwen3NextConfig @@ -59,7 +58,6 @@ __all__ = [ "KimiVLConfig", "NemotronConfig", "NemotronHConfig", - "Nemotron_Nano_VL_Config", "Olmo3Config", "OvisConfig", "RadioConfig", diff --git a/vllm/transformers_utils/configs/nemotron_vl.py b/vllm/transformers_utils/configs/nemotron_vl.py deleted file mode 100644 index 6f98fbafbed5f..0000000000000 --- a/vllm/transformers_utils/configs/nemotron_vl.py +++ /dev/null @@ -1,60 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -# ruff: noqa: E501 -# Adapted from -# https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1/blob/main/configuration.py -# -------------------------------------------------------- -# Adapted from https://huggingface.co/OpenGVLab/InternVL2-Llama3-76B under MIT License -# LICENSE is in incl_licenses directory. -# -------------------------------------------------------- - -from transformers import LlamaConfig -from transformers.configuration_utils import PretrainedConfig -from transformers.dynamic_module_utils import get_class_from_dynamic_module - - -class Nemotron_Nano_VL_Config(PretrainedConfig): - model_type = "Llama_Nemotron_Nano_VL" - is_composition = True - - def __init__( - self, - vision_config=None, - llm_config=None, - force_image_size=None, - downsample_ratio=0.5, - template=None, - ps_version="v1", - image_tag_type="internvl", - projector_hidden_size=4096, - vit_hidden_size=1280, - **kwargs, - ): - super().__init__(**kwargs) - - if vision_config is not None: - assert ( - "auto_map" in vision_config - and "AutoConfig" in vision_config["auto_map"] - ) - vision_auto_config = get_class_from_dynamic_module( - *vision_config["auto_map"]["AutoConfig"].split("--")[::-1] - ) - self.vision_config = vision_auto_config(**vision_config) - else: - self.vision_config = PretrainedConfig() - - if llm_config is None: - self.text_config = LlamaConfig() - else: - self.text_config = LlamaConfig(**llm_config) - - # Assign configuration values - self.force_image_size = force_image_size - self.downsample_ratio = downsample_ratio - self.template = template # TODO move out of here and into the tokenizer - self.ps_version = ps_version # Pixel shuffle version - self.image_tag_type = image_tag_type # TODO: into the tokenizer too? - self.projector_hidden_size = projector_hidden_size - self.vit_hidden_size = vit_hidden_size