From a4ed7315460ee2610bb0afc49686f72ad59ef57b Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 28 Jul 2025 17:15:31 +0800 Subject: [PATCH] [Model] Prioritize Transformers fallback over suffix matching (#21719) Signed-off-by: DarkLight1337 --- .../multimodal/generation/test_common.py | 3 +- vllm/model_executor/models/registry.py | 58 ++++++++++++------- 2 files changed, 38 insertions(+), 23 deletions(-) diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index c3094b0f6461..5bff615fb107 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -222,6 +222,7 @@ VLM_TEST_SETTINGS = { }, marks=[large_gpu_mark(min_gb=32)], ), + # Check "auto" with fallback to transformers "internvl-transformers": VLMTestInfo( models=["OpenGVLab/InternVL3-1B-hf"], test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE), @@ -231,7 +232,7 @@ VLM_TEST_SETTINGS = { use_tokenizer_eos=True, image_size_factors=[(0.25, 0.5, 1.0)], vllm_runner_kwargs={ - "model_impl": "transformers", + "model_impl": "auto", }, auto_cls=AutoModelForImageTextToText, marks=[pytest.mark.core_model], diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 179d5e324da9..5e3a39a6cdd2 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -586,18 +586,6 @@ class _ModelRegistry: return architecture - def _normalize_archs( - self, - architectures: list[str], - model_config: ModelConfig, - ) -> list[str]: - if not architectures: - logger.warning("No model architectures are specified") - - return [ - self._normalize_arch(arch, model_config) for arch in architectures - ] - def inspect_model_cls( self, architectures: Union[str, list[str]], @@ -605,8 +593,8 @@ class _ModelRegistry: ) -> tuple[_ModelInfo, str]: if isinstance(architectures, str): architectures = [architectures] - - normalized_archs = self._normalize_archs(architectures, model_config) + if not architectures: + raise ValueError("No model architectures are specified") # Require transformers impl if model_config.model_impl == ModelImpl.TRANSFORMERS: @@ -617,13 +605,26 @@ class _ModelRegistry: if model_info is not None: return (model_info, arch) - for arch, normalized_arch in zip(architectures, normalized_archs): + # Fallback to transformers impl (after resolving convert_type) + if (all(arch not in self.models for arch in architectures) + and model_config.model_impl == ModelImpl.AUTO + and getattr(model_config, "convert_type", "none") == "none"): + arch = self._try_resolve_transformers(architectures[0], + model_config) + if arch is not None: + model_info = self._try_inspect_model_cls(arch) + if model_info is not None: + return (model_info, arch) + + for arch in architectures: + normalized_arch = self._normalize_arch(arch, model_config) model_info = self._try_inspect_model_cls(normalized_arch) if model_info is not None: return (model_info, arch) - # Fallback to transformers impl - if model_config.model_impl in (ModelImpl.AUTO, ModelImpl.TRANSFORMERS): + # Fallback to transformers impl (before resolving runner_type) + if (all(arch not in self.models for arch in architectures) + and model_config.model_impl == ModelImpl.AUTO): arch = self._try_resolve_transformers(architectures[0], model_config) if arch is not None: @@ -640,8 +641,8 @@ class _ModelRegistry: ) -> tuple[type[nn.Module], str]: if isinstance(architectures, str): architectures = [architectures] - - normalized_archs = self._normalize_archs(architectures, model_config) + if not architectures: + raise ValueError("No model architectures are specified") # Require transformers impl if model_config.model_impl == ModelImpl.TRANSFORMERS: @@ -652,13 +653,26 @@ class _ModelRegistry: if model_cls is not None: return (model_cls, arch) - for arch, normalized_arch in zip(architectures, normalized_archs): + # Fallback to transformers impl (after resolving convert_type) + if (all(arch not in self.models for arch in architectures) + and model_config.model_impl == ModelImpl.AUTO + and getattr(model_config, "convert_type", "none") == "none"): + arch = self._try_resolve_transformers(architectures[0], + model_config) + if arch is not None: + model_cls = self._try_load_model_cls(arch) + if model_cls is not None: + return (model_cls, arch) + + for arch in architectures: + normalized_arch = self._normalize_arch(arch, model_config) model_cls = self._try_load_model_cls(normalized_arch) if model_cls is not None: return (model_cls, arch) - # Fallback to transformers impl - if model_config.model_impl in (ModelImpl.AUTO, ModelImpl.TRANSFORMERS): + # Fallback to transformers impl (before resolving runner_type) + if (all(arch not in self.models for arch in architectures) + and model_config.model_impl == ModelImpl.AUTO): arch = self._try_resolve_transformers(architectures[0], model_config) if arch is not None: