From 166a168b0fa606608bbd3b4be1ab5d904a4e3927 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 17 Mar 2025 21:14:32 +0800 Subject: [PATCH] [Doc] Fix misleading log during multi-modal profiling (#14955) Signed-off-by: DarkLight1337 --- vllm/multimodal/profiling.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py index b791fb83478fc..62b75afe8de1b 100644 --- a/vllm/multimodal/profiling.py +++ b/vllm/multimodal/profiling.py @@ -218,8 +218,10 @@ class MultiModalProfiler(Generic[_I]): # V0 does not support chunked prefill. if total_len > seq_len and not envs.VLLM_USE_V1: + # `max_num_batched_tokens` is defined by `SchedulerConfig` logger.warning( - "The context length (%d) of the model is too short " + "The sequence length used for profiling (" + "max_num_batched_tokens / max_num_seqs = %d) is too short " "to hold the multi-modal embeddings in the worst case " "(%d tokens in total, out of which %s are reserved for " "multi-modal embeddings). This may cause certain "