From 81eea3d348c26fb1e6ff0185ad109aedd60a28a2 Mon Sep 17 00:00:00 2001 From: Xiaodong Wang Date: Sun, 31 Aug 2025 05:57:05 -0700 Subject: [PATCH] vllm fix check on max vocab size (#22471) Signed-off-by: Roger Wang Signed-off-by: Roger Wang Co-authored-by: Roger Wang Co-authored-by: Roger Wang --- vllm/v1/engine/processor.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 6d54c92e2dd1..1aa117ded4ed 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -470,7 +470,19 @@ class Processor: else: tokenizer = self.tokenizer.get_lora_tokenizer(lora_request) max_input_id = max(prompt_ids, default=0) - if max_input_id > tokenizer.max_token_id: + + # NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while + # self.model_config.get_vocab_size() is the model’s vocab size. + # For Qwen3 models, the language model has extra tokens that do + # not exist in the tokenizer, and vice versa for multimodal + # placeholder tokens in some multimodal models. + # See https://github.com/QwenLM/Qwen3/issues/29#issuecomment-1933720399 # noqa: E501 + # and https://github.com/vllm-project/vllm/pull/22471#discussion_r2312251421 # noqa: E501 + + # Here we take the max of the two to determine if a token id is + # truly out-of-vocabulary. + if max_input_id > max(tokenizer.max_token_id, + self.model_config.get_vocab_size() - 1): raise ValueError( f"Token id {max_input_id} is out of vocabulary")