From a3e8611da5744b1f64f3c4be063bf4a7aed952f0 Mon Sep 17 00:00:00 2001
From: Shanshan Shen <467638484@qq.com>
Date: Mon, 27 Oct 2025 18:16:20 +0800
Subject: [PATCH] [Bugfix] Limit the default value of `max_model_len` when it
 is not specified by users (#27556)

Signed-off-by: shen-shanshan <467638484@qq.com>
---
 vllm/config/model.py        | 17 +++++------------
 vllm/platforms/interface.py |  7 +++++++
 vllm/platforms/tpu.py       | 16 ++++++++++++++++
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/vllm/config/model.py b/vllm/config/model.py
index f81d324d8f80..b32d820edd7b 100644
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -2112,20 +2112,13 @@ def _get_and_verify_max_len(
     if encoder_config and "max_seq_length" in encoder_config:
         derived_max_model_len = encoder_config["max_seq_length"]
 
-    # If the user specified a max length, make sure it is smaller than the
-    # derived length from the HF model config.
+    # If the user didn't specify `max_model_len`, then use that derived from
+    # the model config as a default value.
     if max_model_len is None:
         max_model_len = int(derived_max_model_len)
-        if current_platform.is_tpu():
-            logger.warning(
-                "--max-model-len is not specified, "
-                "it's currently using model's default length %s, "
-                "which might be too large."
-                "Please input with --max-model-len based on your "
-                "request input length and output length, to avoid "
-                "unnecessary degradation.",
-                max_model_len,
-            )
+        max_model_len = current_platform.check_max_model_len(max_model_len)
+    # If the user specified a max length, make sure it is smaller than the
+    # derived length from the HF model config.
     elif max_model_len > derived_max_model_len:
         # Some models might have a separate key for specifying model_max_length
         # that will be bigger than derived_max_model_len. We compare user input
diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py
index 1fb3aba9b1f7..446282956439 100644
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -608,6 +608,13 @@ class Platform:
         """
         return None
 
+    @classmethod
+    def check_max_model_len(cls, max_model_len: int) -> int:
+        """
+        Check max_model_len for the current platform.
+        """
+        return max_model_len
+
 
 class UnspecifiedPlatform(Platform):
     _enum = PlatformEnum.UNSPECIFIED
diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py
index ab752f438f72..0a14ee011f7f 100644
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -251,6 +251,22 @@ class TpuPlatform(Platform):
     def use_sync_weight_loader(cls) -> bool:
         return True
 
+    @classmethod
+    def check_max_model_len(cls, max_model_len: int) -> int:
+        """
+        Check max_model_len for the current platform.
+        """
+        logger.warning(
+            "--max-model-len is not specified, "
+            "it's currently using model's default length %d, "
+            "which might be too large."
+            "Please input with --max-model-len based on your "
+            "request input length and output length, to avoid "
+            "unnecessary degradation.",
+            max_model_len,
+        )
+        return max_model_len
+
 
 try:
     from tpu_inference.platforms import TpuPlatform as TpuInferencePlatform