mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 17:56:08 +08:00
Print the warning only once (#16193)
Signed-off-by: Gregory Shtrasberg <Gregory.Shtrasberg@amd.com>
This commit is contained in:
parent
66d433b94f
commit
ad434d4cfe
@ -216,17 +216,18 @@ class MultiModalProfiler(Generic[_I]):
|
|||||||
# Encoder-decoder multimodal models only support v0
|
# Encoder-decoder multimodal models only support v0
|
||||||
if total_len > seq_len:
|
if total_len > seq_len:
|
||||||
# `max_num_batched_tokens` is defined by `SchedulerConfig`
|
# `max_num_batched_tokens` is defined by `SchedulerConfig`
|
||||||
logger.warning(
|
logger.warning_once(
|
||||||
"The encoder sequence length used for profiling ("
|
"The encoder sequence length used for profiling ("
|
||||||
"max_num_batched_tokens / max_num_seqs = %d) is too short "
|
f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
|
||||||
|
" is too short "
|
||||||
"to hold the multi-modal embeddings in the worst case "
|
"to hold the multi-modal embeddings in the worst case "
|
||||||
"(%d tokens in total, out of which %s are reserved for "
|
f"({total_len} tokens in total, out of which "
|
||||||
|
f"{total_placeholders_by_modality} are reserved for "
|
||||||
"multi-modal embeddings). This may cause certain "
|
"multi-modal embeddings). This may cause certain "
|
||||||
"multi-modal inputs to fail during inference, even when "
|
"multi-modal inputs to fail during inference, even when "
|
||||||
"the input text is short. To avoid this, you should "
|
"the input text is short. To avoid this, you should "
|
||||||
"increase `max_model_len`, reduce `max_num_seqs`, "
|
"increase `max_model_len`, reduce `max_num_seqs`, "
|
||||||
"and/or reduce `mm_counts`.", seq_len, total_len,
|
"and/or reduce `mm_counts`.")
|
||||||
total_placeholders_by_modality)
|
|
||||||
|
|
||||||
processor = cast(EncDecMultiModalProcessor, self.processor)
|
processor = cast(EncDecMultiModalProcessor, self.processor)
|
||||||
if processor.pad_dummy_encoder_prompt:
|
if processor.pad_dummy_encoder_prompt:
|
||||||
@ -251,17 +252,18 @@ class MultiModalProfiler(Generic[_I]):
|
|||||||
# V0 does not support chunked prefill.
|
# V0 does not support chunked prefill.
|
||||||
if total_len > seq_len and not envs.VLLM_USE_V1:
|
if total_len > seq_len and not envs.VLLM_USE_V1:
|
||||||
# `max_num_batched_tokens` is defined by `SchedulerConfig`
|
# `max_num_batched_tokens` is defined by `SchedulerConfig`
|
||||||
logger.warning(
|
logger.warning_once(
|
||||||
"The sequence length used for profiling ("
|
"The sequence length used for profiling ("
|
||||||
"max_num_batched_tokens / max_num_seqs = %d) is too short "
|
f"max_num_batched_tokens / max_num_seqs = {seq_len}) "
|
||||||
|
"is too short "
|
||||||
"to hold the multi-modal embeddings in the worst case "
|
"to hold the multi-modal embeddings in the worst case "
|
||||||
"(%d tokens in total, out of which %s are reserved for "
|
f"({total_len} tokens in total, out of which "
|
||||||
|
f"{total_placeholders_by_modality} are reserved for "
|
||||||
"multi-modal embeddings). This may cause certain "
|
"multi-modal embeddings). This may cause certain "
|
||||||
"multi-modal inputs to fail during inference, even when "
|
"multi-modal inputs to fail during inference, even when "
|
||||||
"the input text is short. To avoid this, you should "
|
"the input text is short. To avoid this, you should "
|
||||||
"increase `max_model_len`, reduce `max_num_seqs`, "
|
"increase `max_model_len`, reduce `max_num_seqs`, "
|
||||||
"and/or reduce `mm_counts`.", seq_len, total_len,
|
"and/or reduce `mm_counts`.")
|
||||||
total_placeholders_by_modality)
|
|
||||||
|
|
||||||
if total_len < seq_len:
|
if total_len < seq_len:
|
||||||
prompt_token_ids.extend([0] * (seq_len - total_len))
|
prompt_token_ids.extend([0] * (seq_len - total_len))
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user