mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-26 10:49:38 +08:00
[Frontend] Update the warning log when using VLLM_ALLOW_LONG_MAX_MODEL_LEN (#20904)
Signed-off-by: wang.yuqi <noooop@126.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
d7fbc6ddac
commit
55602bb2e6
@ -3021,16 +3021,20 @@ def _get_and_verify_max_len(
|
||||
f"User-specified max_model_len ({max_model_len}) is greater "
|
||||
f"than the derived max_model_len ({max_len_key}="
|
||||
f"{derived_max_model_len} or model_max_length="
|
||||
f"{model_max_length} in model's config.json). This may lead "
|
||||
"to incorrect model outputs or CUDA errors.")
|
||||
f"{model_max_length} in model's config.json).")
|
||||
warning = (
|
||||
"VLLM_ALLOW_LONG_MAX_MODEL_LEN must be used with extreme "
|
||||
"caution. If the model uses relative position encoding (RoPE), "
|
||||
"positions exceeding derived_max_model_len lead to nan. If the "
|
||||
"model uses absolute position encoding, positions exceeding "
|
||||
"derived_max_model_len will cause a CUDA array out-of-bounds "
|
||||
"error.")
|
||||
if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN:
|
||||
logger.warning(
|
||||
"%s Make sure the value is correct and within the "
|
||||
"model context size.", msg)
|
||||
logger.warning_once("%s %s", msg, warning)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"{msg} To allow overriding this maximum, set "
|
||||
"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1")
|
||||
f"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1. {warning}")
|
||||
return int(max_model_len)
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user