Improve Transformers backend model loading QoL (#17039)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-01-28 05:37:14 +08:00 · 2025-04-23 15:33:51 +01:00 · 2025-04-23 15:33:51 +01:00 · 8e630d680e
commit 8e630d680e
parent af869f6dff
1 changed files with 8 additions and 5 deletions
--- a/vllm/model_executor/model_loader/utils.py
+++ b/vllm/model_executor/model_loader/utils.py
@ -55,7 +55,10 @@ def resolve_transformers_arch(model_config: ModelConfig,
        #     "AutoModelFor<Task>": "<your-repo-name>--<config-name>",
        # },
        auto_modules = {
-            name: get_class_from_dynamic_module(module, model_config.model)
+            name:
+            get_class_from_dynamic_module(module,
+                                          model_config.model,
+                                          revision=model_config.revision)
            for name, module in sorted(auto_map.items(), key=lambda x: x[0])
        }
        custom_model_module = auto_modules.get("AutoModel")
@ -97,10 +100,10 @@ def get_model_architecture(
        architectures = ["QuantMixtralForCausalLM"]

    vllm_supported_archs = ModelRegistry.get_supported_archs()
-    is_vllm_supported = any(arch in vllm_supported_archs
-                            for arch in architectures)
-    if (not is_vllm_supported
-            or model_config.model_impl == ModelImpl.TRANSFORMERS):
+    vllm_not_supported = not any(arch in vllm_supported_archs
+                                 for arch in architectures)
+    if (model_config.model_impl == ModelImpl.TRANSFORMERS or
+            model_config.model_impl != ModelImpl.VLLM and vllm_not_supported):
        architectures = resolve_transformers_arch(model_config, architectures)

    model_cls, arch = ModelRegistry.resolve_model_cls(architectures)