[Misc] Fix estimated max model len msg (#18966)

Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
2025-12-14 07:25:01 +08:00 · 2025-05-31 01:43:44 -07:00 · 2025-05-31 01:43:44 -07:00 · 1e123529d7
commit 1e123529d7
parent dff80b0e42
1 changed files with 5 additions and 4 deletions
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@ -544,8 +544,9 @@ def check_enough_kv_cache_memory(vllm_config: VllmConfig,
                                                   available_memory)
        estimated_msg = ""
        if estimated_max_len > 0:
-            estimated_msg = " Based on the available memory,"
+            estimated_msg = (
-            f" the estimated maximum model length is {estimated_max_len}."
+                "Based on the available memory, "
                f"the estimated maximum model length is {estimated_max_len}.")
        raise ValueError(
            f"To serve at least one request with the models's max seq len "