mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 01:45:01 +08:00
[Model] Prioritize Transformers fallback over suffix matching (#21719)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
d128d0d554
commit
a4ed731546
@ -222,6 +222,7 @@ VLM_TEST_SETTINGS = {
|
||||
},
|
||||
marks=[large_gpu_mark(min_gb=32)],
|
||||
),
|
||||
# Check "auto" with fallback to transformers
|
||||
"internvl-transformers": VLMTestInfo(
|
||||
models=["OpenGVLab/InternVL3-1B-hf"],
|
||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||
@ -231,7 +232,7 @@ VLM_TEST_SETTINGS = {
|
||||
use_tokenizer_eos=True,
|
||||
image_size_factors=[(0.25, 0.5, 1.0)],
|
||||
vllm_runner_kwargs={
|
||||
"model_impl": "transformers",
|
||||
"model_impl": "auto",
|
||||
},
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
marks=[pytest.mark.core_model],
|
||||
|
||||
@ -586,18 +586,6 @@ class _ModelRegistry:
|
||||
|
||||
return architecture
|
||||
|
||||
def _normalize_archs(
|
||||
self,
|
||||
architectures: list[str],
|
||||
model_config: ModelConfig,
|
||||
) -> list[str]:
|
||||
if not architectures:
|
||||
logger.warning("No model architectures are specified")
|
||||
|
||||
return [
|
||||
self._normalize_arch(arch, model_config) for arch in architectures
|
||||
]
|
||||
|
||||
def inspect_model_cls(
|
||||
self,
|
||||
architectures: Union[str, list[str]],
|
||||
@ -605,8 +593,8 @@ class _ModelRegistry:
|
||||
) -> tuple[_ModelInfo, str]:
|
||||
if isinstance(architectures, str):
|
||||
architectures = [architectures]
|
||||
|
||||
normalized_archs = self._normalize_archs(architectures, model_config)
|
||||
if not architectures:
|
||||
raise ValueError("No model architectures are specified")
|
||||
|
||||
# Require transformers impl
|
||||
if model_config.model_impl == ModelImpl.TRANSFORMERS:
|
||||
@ -617,13 +605,26 @@ class _ModelRegistry:
|
||||
if model_info is not None:
|
||||
return (model_info, arch)
|
||||
|
||||
for arch, normalized_arch in zip(architectures, normalized_archs):
|
||||
# Fallback to transformers impl (after resolving convert_type)
|
||||
if (all(arch not in self.models for arch in architectures)
|
||||
and model_config.model_impl == ModelImpl.AUTO
|
||||
and getattr(model_config, "convert_type", "none") == "none"):
|
||||
arch = self._try_resolve_transformers(architectures[0],
|
||||
model_config)
|
||||
if arch is not None:
|
||||
model_info = self._try_inspect_model_cls(arch)
|
||||
if model_info is not None:
|
||||
return (model_info, arch)
|
||||
|
||||
for arch in architectures:
|
||||
normalized_arch = self._normalize_arch(arch, model_config)
|
||||
model_info = self._try_inspect_model_cls(normalized_arch)
|
||||
if model_info is not None:
|
||||
return (model_info, arch)
|
||||
|
||||
# Fallback to transformers impl
|
||||
if model_config.model_impl in (ModelImpl.AUTO, ModelImpl.TRANSFORMERS):
|
||||
# Fallback to transformers impl (before resolving runner_type)
|
||||
if (all(arch not in self.models for arch in architectures)
|
||||
and model_config.model_impl == ModelImpl.AUTO):
|
||||
arch = self._try_resolve_transformers(architectures[0],
|
||||
model_config)
|
||||
if arch is not None:
|
||||
@ -640,8 +641,8 @@ class _ModelRegistry:
|
||||
) -> tuple[type[nn.Module], str]:
|
||||
if isinstance(architectures, str):
|
||||
architectures = [architectures]
|
||||
|
||||
normalized_archs = self._normalize_archs(architectures, model_config)
|
||||
if not architectures:
|
||||
raise ValueError("No model architectures are specified")
|
||||
|
||||
# Require transformers impl
|
||||
if model_config.model_impl == ModelImpl.TRANSFORMERS:
|
||||
@ -652,13 +653,26 @@ class _ModelRegistry:
|
||||
if model_cls is not None:
|
||||
return (model_cls, arch)
|
||||
|
||||
for arch, normalized_arch in zip(architectures, normalized_archs):
|
||||
# Fallback to transformers impl (after resolving convert_type)
|
||||
if (all(arch not in self.models for arch in architectures)
|
||||
and model_config.model_impl == ModelImpl.AUTO
|
||||
and getattr(model_config, "convert_type", "none") == "none"):
|
||||
arch = self._try_resolve_transformers(architectures[0],
|
||||
model_config)
|
||||
if arch is not None:
|
||||
model_cls = self._try_load_model_cls(arch)
|
||||
if model_cls is not None:
|
||||
return (model_cls, arch)
|
||||
|
||||
for arch in architectures:
|
||||
normalized_arch = self._normalize_arch(arch, model_config)
|
||||
model_cls = self._try_load_model_cls(normalized_arch)
|
||||
if model_cls is not None:
|
||||
return (model_cls, arch)
|
||||
|
||||
# Fallback to transformers impl
|
||||
if model_config.model_impl in (ModelImpl.AUTO, ModelImpl.TRANSFORMERS):
|
||||
# Fallback to transformers impl (before resolving runner_type)
|
||||
if (all(arch not in self.models for arch in architectures)
|
||||
and model_config.model_impl == ModelImpl.AUTO):
|
||||
arch = self._try_resolve_transformers(architectures[0],
|
||||
model_config)
|
||||
if arch is not None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user