From 8865da157bea1b25afc4ff28031858d557363290 Mon Sep 17 00:00:00 2001 From: "sangho.lee" Date: Wed, 15 Oct 2025 02:13:59 -0500 Subject: [PATCH] [Bugfix][Multi Modal] Fix incorrect Molmo token processing (#26873) Signed-off-by: sanghol --- vllm/model_executor/models/molmo.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index 106aaf413e99b..dce94d181c4cd 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -1264,13 +1264,16 @@ class MolmoMultiModalProcessor(BaseMultiModalProcessor[MolmoProcessingInfo]): ) -> list[int]: processor = self.info.get_hf_processor() - # Apply the chat template to the tokens + # The chat template is already applied to the prompt tokens + # Use message_format="none" to avoid applying it again + # Prepend an empty space if `always_start_with_space` is True tokens = processor.processor.get_tokens_input( # type: ignore self.info.get_tokenizer().decode(prompt_tokens), - message_format=processor.message_format, + message_format="none", always_start_with_space=processor.always_start_with_space, ) + # Prepend a BOS token id to the tokens processed_data = self.info.ctx.call_hf_processor( processor, # type: ignore dict(tokens=tokens),