Improve static type checking in LoRAModelRunnerMixin (#17104)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-12-14 05:45:01 +08:00 · 2025-04-24 14:14:47 +01:00 · 2025-04-24 14:14:47 +01:00 · 21f4f1c9a4
commit 21f4f1c9a4
parent b0c1f6202d
1 changed files with 6 additions and 10 deletions
--- a/vllm/v1/worker/lora_model_runner_mixin.py
+++ b/vllm/v1/worker/lora_model_runner_mixin.py
@ -28,20 +28,16 @@ class LoRAModelRunnerMixin:
                        scheduler_config: SchedulerConfig,
                        lora_config: LoRAConfig, device: str) -> nn.Module:
-        assert supports_lora(
+        if not supports_lora(model):
-            model), f"{model.__class__.__name__} does not support LoRA yet."
+            raise ValueError(
                f"{model.__class__.__name__} does not support LoRA yet.")
        if supports_multimodal(model):
            logger.warning("Regarding multimodal models, vLLM currently "
                           "only supports adding LoRA to language model.")
-        # It's necessary to distinguish between the max_position_embeddings
+        # Use get_text_config() in case of multimodal models
-        # of VLMs and LLMs.
+        text_config = model_config.hf_config.get_text_config()
        if hasattr(model.config, "max_position_embeddings"):
            max_pos_embeddings = model.config.max_position_embeddings
        else:
            max_pos_embeddings = (
                model.config.text_config.max_position_embeddings)
        # Add LoRA Manager to the Model Runner
        self.lora_manager = LRUCacheWorkerLoRAManager(
@ -52,7 +48,7 @@ class LoRAModelRunnerMixin:
            device,
            model.embedding_modules,
            model.embedding_padding_modules,
-            max_position_embeddings=max_pos_embeddings,
+            max_position_embeddings=text_config.max_position_embeddings,
        )
        return self.lora_manager.create_lora_manager(model)