mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 13:44:58 +08:00
vllm fix check on max vocab size (#22471)
Signed-off-by: Roger Wang <hey@rogerw.io> Signed-off-by: Roger Wang <hey@rogerw.me> Co-authored-by: Roger Wang <hey@rogerw.io> Co-authored-by: Roger Wang <hey@rogerw.me>
This commit is contained in:
parent
9701352e4b
commit
81eea3d348
@ -470,7 +470,19 @@ class Processor:
|
|||||||
else:
|
else:
|
||||||
tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
|
tokenizer = self.tokenizer.get_lora_tokenizer(lora_request)
|
||||||
max_input_id = max(prompt_ids, default=0)
|
max_input_id = max(prompt_ids, default=0)
|
||||||
if max_input_id > tokenizer.max_token_id:
|
|
||||||
|
# NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while
|
||||||
|
# self.model_config.get_vocab_size() is the model’s vocab size.
|
||||||
|
# For Qwen3 models, the language model has extra tokens that do
|
||||||
|
# not exist in the tokenizer, and vice versa for multimodal
|
||||||
|
# placeholder tokens in some multimodal models.
|
||||||
|
# See https://github.com/QwenLM/Qwen3/issues/29#issuecomment-1933720399 # noqa: E501
|
||||||
|
# and https://github.com/vllm-project/vllm/pull/22471#discussion_r2312251421 # noqa: E501
|
||||||
|
|
||||||
|
# Here we take the max of the two to determine if a token id is
|
||||||
|
# truly out-of-vocabulary.
|
||||||
|
if max_input_id > max(tokenizer.max_token_id,
|
||||||
|
self.model_config.get_vocab_size() - 1):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Token id {max_input_id} is out of vocabulary")
|
f"Token id {max_input_id} is out of vocabulary")
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user