From a3e8611da5744b1f64f3c4be063bf4a7aed952f0 Mon Sep 17 00:00:00 2001 From: Shanshan Shen <467638484@qq.com> Date: Mon, 27 Oct 2025 18:16:20 +0800 Subject: [PATCH] [Bugfix] Limit the default value of `max_model_len` when it is not specified by users (#27556) Signed-off-by: shen-shanshan <467638484@qq.com> --- vllm/config/model.py | 17 +++++------------ vllm/platforms/interface.py | 7 +++++++ vllm/platforms/tpu.py | 16 ++++++++++++++++ 3 files changed, 28 insertions(+), 12 deletions(-) diff --git a/vllm/config/model.py b/vllm/config/model.py index f81d324d8f80..b32d820edd7b 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -2112,20 +2112,13 @@ def _get_and_verify_max_len( if encoder_config and "max_seq_length" in encoder_config: derived_max_model_len = encoder_config["max_seq_length"] - # If the user specified a max length, make sure it is smaller than the - # derived length from the HF model config. + # If the user didn't specify `max_model_len`, then use that derived from + # the model config as a default value. if max_model_len is None: max_model_len = int(derived_max_model_len) - if current_platform.is_tpu(): - logger.warning( - "--max-model-len is not specified, " - "it's currently using model's default length %s, " - "which might be too large." - "Please input with --max-model-len based on your " - "request input length and output length, to avoid " - "unnecessary degradation.", - max_model_len, - ) + max_model_len = current_platform.check_max_model_len(max_model_len) + # If the user specified a max length, make sure it is smaller than the + # derived length from the HF model config. elif max_model_len > derived_max_model_len: # Some models might have a separate key for specifying model_max_length # that will be bigger than derived_max_model_len. We compare user input diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 1fb3aba9b1f7..446282956439 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -608,6 +608,13 @@ class Platform: """ return None + @classmethod + def check_max_model_len(cls, max_model_len: int) -> int: + """ + Check max_model_len for the current platform. + """ + return max_model_len + class UnspecifiedPlatform(Platform): _enum = PlatformEnum.UNSPECIFIED diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index ab752f438f72..0a14ee011f7f 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -251,6 +251,22 @@ class TpuPlatform(Platform): def use_sync_weight_loader(cls) -> bool: return True + @classmethod + def check_max_model_len(cls, max_model_len: int) -> int: + """ + Check max_model_len for the current platform. + """ + logger.warning( + "--max-model-len is not specified, " + "it's currently using model's default length %d, " + "which might be too large." + "Please input with --max-model-len based on your " + "request input length and output length, to avoid " + "unnecessary degradation.", + max_model_len, + ) + return max_model_len + try: from tpu_inference.platforms import TpuPlatform as TpuInferencePlatform