From 16be10168c6bf1f72b4a566cf49cdd10dbbdcf1b Mon Sep 17 00:00:00 2001
From: yurekami <yurekami@users.noreply.github.com>
Date: Wed, 24 Dec 2025 23:34:07 +0900
Subject: [PATCH] fix(config): validate skip_tokenizer_init is not used with
 multimodal models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add early validation to detect when skip_tokenizer_init=True is used
with multimodal models like Gemma3. This combination is not supported
because multimodal processors require a tokenizer for initialization.

Previously, this would cause a confusing AttributeError:
'NoneType' object has no attribute 'image_token_id' deep in the
transformers Gemma3Processor initialization.

Now users get a clear error message explaining the incompatibility.

Fixes #31123

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: yurekami <yurekami@users.noreply.github.com>
---
 vllm/config/model.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/vllm/config/model.py b/vllm/config/model.py
index a730aa8ad1b9c..8eff5c84ef590 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -588,6 +588,19 @@ class ModelConfig:
                 "repo name or path using the --tokenizer argument."
             )
 
+        # Multimodal models require a tokenizer for processor initialization
+        # unless embedded inputs are enabled (enable_mm_embeds=True)
+        if self.skip_tokenizer_init and self.is_multimodal_model:
+            mm_config = getattr(self, "multimodal_config", None)
+            if mm_config is None or not mm_config.enable_mm_embeds:
+                raise ValueError(
+                    "Multimodal models require a tokenizer for processing. "
+                    "Please set skip_tokenizer_init=False when using multimodal "
+                    f"models like {self.model}. Alternatively, enable embedded "
+                    "inputs with enable_mm_embeds=True if your inputs are "
+                    "pre-embedded."
+                )
+
         if self.disable_sliding_window:
             # Set after get_and_verify_max_len to ensure that max_model_len
             # can be correctly capped to sliding window size