From 55602bb2e695108e35501dfcd5890e7664c31495 Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Mon, 1 Sep 2025 16:50:25 +0800 Subject: [PATCH] [Frontend] Update the warning log when using VLLM_ALLOW_LONG_MAX_MODEL_LEN (#20904) Signed-off-by: wang.yuqi Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/__init__.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 484f3986bb301..f53e8b0308853 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -3021,16 +3021,20 @@ def _get_and_verify_max_len( f"User-specified max_model_len ({max_model_len}) is greater " f"than the derived max_model_len ({max_len_key}=" f"{derived_max_model_len} or model_max_length=" - f"{model_max_length} in model's config.json). This may lead " - "to incorrect model outputs or CUDA errors.") + f"{model_max_length} in model's config.json).") + warning = ( + "VLLM_ALLOW_LONG_MAX_MODEL_LEN must be used with extreme " + "caution. If the model uses relative position encoding (RoPE), " + "positions exceeding derived_max_model_len lead to nan. If the " + "model uses absolute position encoding, positions exceeding " + "derived_max_model_len will cause a CUDA array out-of-bounds " + "error.") if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN: - logger.warning( - "%s Make sure the value is correct and within the " - "model context size.", msg) + logger.warning_once("%s %s", msg, warning) else: raise ValueError( f"{msg} To allow overriding this maximum, set " - "the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1") + f"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1. {warning}") return int(max_model_len)