From 48eeb1ffbad37e30d61c1e2909f141f98fa337a5 Mon Sep 17 00:00:00 2001 From: Xingyu Liu Date: Tue, 9 Dec 2025 11:58:08 -0800 Subject: [PATCH] remove multimodal in model_arch_config Signed-off-by: Xingyu Liu --- tests/config/model_arch_groundtruth.json | 40 +++++++++---------- tests/config/test_model_arch_config.py | 2 - vllm/config/model_arch.py | 3 -- .../model_arch_config_convertor.py | 18 +-------- 4 files changed, 21 insertions(+), 42 deletions(-) diff --git a/tests/config/model_arch_groundtruth.json b/tests/config/model_arch_groundtruth.json index c6d321f6c3257..c3540ab5bdf03 100644 --- a/tests/config/model_arch_groundtruth.json +++ b/tests/config/model_arch_groundtruth.json @@ -13,7 +13,7 @@ "total_num_kv_heads": 0, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.float32" }, "mistralai/Mamba-Codestral-7B-v0.1": { @@ -30,7 +30,7 @@ "total_num_kv_heads": 0, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "ibm-nasa-geospatial/Prithvi-EO-2.0-300M-TL-Sen1Floods11": { @@ -47,7 +47,7 @@ "total_num_kv_heads": 0, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": true, + "supports_multimodal": true, "dtype": "torch.float32" }, "Zyphra/Zamba2-7B-instruct": { @@ -64,7 +64,7 @@ "total_num_kv_heads": 32, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "mosaicml/mpt-7b": { @@ -81,7 +81,7 @@ "total_num_kv_heads": 32, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "databricks/dbrx-instruct": { @@ -98,7 +98,7 @@ "total_num_kv_heads": 8, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "tiiuae/falcon-7b": { @@ -115,7 +115,7 @@ "total_num_kv_heads": 1, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "tiiuae/falcon-40b": { @@ -132,7 +132,7 @@ "total_num_kv_heads": 8, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "luccafong/deepseek_mtp_main_random": { @@ -149,7 +149,7 @@ "total_num_kv_heads": 32, "num_experts": 72, "is_deepseek_mla": true, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "luccafong/deepseek_mtp_draft_random": { @@ -166,7 +166,7 @@ "total_num_kv_heads": 32, "num_experts": 72, "is_deepseek_mla": true, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "Qwen/Qwen3-Next-80B-A3B-Instruct": { @@ -183,7 +183,7 @@ "total_num_kv_heads": 2, "num_experts": 512, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "tiny-random/qwen3-next-moe": { @@ -200,7 +200,7 @@ "total_num_kv_heads": 8, "num_experts": 32, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "zai-org/GLM-4.5": { @@ -217,7 +217,7 @@ "total_num_kv_heads": 8, "num_experts": 160, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "baidu/ERNIE-4.5-21B-A3B-PT": { @@ -234,7 +234,7 @@ "total_num_kv_heads": 4, "num_experts": 64, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "lmsys/gpt-oss-20b-bf16": { @@ -251,7 +251,7 @@ "total_num_kv_heads": 8, "num_experts": 32, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "deepseek-ai/DeepSeek-V3.2-Exp": { @@ -268,7 +268,7 @@ "total_num_kv_heads": 128, "num_experts": 256, "is_deepseek_mla": true, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "meta-llama/Llama-4-Scout-17B-16E-Instruct": { @@ -285,7 +285,7 @@ "total_num_kv_heads": 8, "num_experts": 16, "is_deepseek_mla": false, - "is_multimodal_model": true, + "supports_multimodal": true, "dtype": "torch.bfloat16" }, "nvidia/Llama-3_3-Nemotron-Super-49B-v1": { @@ -302,7 +302,7 @@ "total_num_kv_heads": 8, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "XiaomiMiMo/MiMo-7B-RL": { @@ -319,7 +319,7 @@ "total_num_kv_heads": 8, "num_experts": 0, "is_deepseek_mla": false, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.bfloat16" }, "meituan-longcat/LongCat-Flash-Chat": { @@ -336,7 +336,7 @@ "total_num_kv_heads": 64, "num_experts": 512, "is_deepseek_mla": true, - "is_multimodal_model": false, + "supports_multimodal": false, "dtype": "torch.float32" } } diff --git a/tests/config/test_model_arch_config.py b/tests/config/test_model_arch_config.py index 365cc1104ccaf..90c550de0e3e5 100644 --- a/tests/config/test_model_arch_config.py +++ b/tests/config/test_model_arch_config.py @@ -64,8 +64,6 @@ def test_model_arch_config(): assert model_arch_config.total_num_kv_heads == expected["total_num_kv_heads"] assert model_arch_config.num_experts == expected["num_experts"] assert model_arch_config.is_deepseek_mla == expected["is_deepseek_mla"] - assert model_arch_config.is_multimodal_model == expected["is_multimodal_model"] - dtype = model_arch_config.torch_dtype assert str(dtype) == expected["dtype"] diff --git a/vllm/config/model_arch.py b/vllm/config/model_arch.py index a35288c2330bf..6d9e32a24c5c8 100644 --- a/vllm/config/model_arch.py +++ b/vllm/config/model_arch.py @@ -53,9 +53,6 @@ class ModelArchitectureConfig: torch_dtype: torch.dtype | str | None """PyTorch data type for model weights (e.g., 'float16', 'bfloat16').""" - is_multimodal_model: bool - """Whether the model is a multimodal model.""" - is_deepseek_mla: bool """Whether the model is a DeepSeek MLA model.""" diff --git a/vllm/transformers_utils/model_arch_config_convertor.py b/vllm/transformers_utils/model_arch_config_convertor.py index d1e28cbe558bb..d453a2395e66c 100644 --- a/vllm/transformers_utils/model_arch_config_convertor.py +++ b/vllm/transformers_utils/model_arch_config_convertor.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import TYPE_CHECKING, final +from typing import final import torch from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE @@ -17,17 +17,8 @@ from vllm.transformers_utils.config import ( get_hf_text_config, try_get_safetensors_metadata, ) -from vllm.utils.import_utils import LazyLoader from vllm.utils.torch_utils import common_broadcastable_dtype -if TYPE_CHECKING: - import vllm.model_executor.models.registry as me_models_registry -else: - # Use lazy loading to avoid circular import - me_models_registry = LazyLoader( - "model_executor", globals(), "vllm.model_executor.models.registry" - ) - logger = init_logger(__name__) @@ -248,12 +239,6 @@ class ModelArchConfigConvertorBase: derived_max_model_len = tmp_max_len return derived_max_model_len, max_len_key - def is_multimodal_model(self) -> bool: - return any( - multi_model_arch in self.hf_config.architectures - for multi_model_arch in me_models_registry._MULTIMODAL_MODELS - ) - def convert(self, model_id: str, revision: str | None) -> ModelArchitectureConfig: model_arch_config = ModelArchitectureConfig( architectures=getattr(self.hf_config, "architectures", []), @@ -268,7 +253,6 @@ class ModelArchConfigConvertorBase: num_experts=self.get_num_experts(self.hf_text_config), quantization_config=self.get_quantization_config(self.hf_config), torch_dtype=self.get_torch_dtype(self.hf_config, model_id, revision), - is_multimodal_model=self.is_multimodal_model(), is_deepseek_mla=self.is_deepseek_mla(), derived_max_model_len_and_key=self.derive_max_model_len_and_key(), )