diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 8cb814a6d053f..affe295e747b8 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2185,7 +2185,7 @@ class GPUModelRunner( # pos_info.length may overcount (e.g., special tokens in Qwen-VL). # Fall back to length if is_embed is None. num_tokens = self.info.get_num_mm_encoder_tokens( # type: ignore[attr-defined] - pos_info.get_num_embeds() + pos_info.get_num_embeds ) prompt_lora_mapping.append(lora_id) token_lora_mapping.extend([lora_id] * num_tokens)