From 1e123529d7df1ff8f868b19aeced6a64e67bd618 Mon Sep 17 00:00:00 2001 From: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com> Date: Sat, 31 May 2025 01:43:44 -0700 Subject: [PATCH] [Misc] Fix estimated max model len msg (#18966) Signed-off-by: Yong Hoon Shin --- vllm/v1/core/kv_cache_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 403b5401be75..a41fe4881870 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -544,16 +544,17 @@ def check_enough_kv_cache_memory(vllm_config: VllmConfig, available_memory) estimated_msg = "" if estimated_max_len > 0: - estimated_msg = " Based on the available memory," - f" the estimated maximum model length is {estimated_max_len}." + estimated_msg = ( + "Based on the available memory, " + f"the estimated maximum model length is {estimated_max_len}.") raise ValueError( f"To serve at least one request with the models's max seq len " f"({max_model_len}), ({needed_memory/GiB_bytes:.2f} GiB KV " f"cache is needed, which is larger than the available KV cache " - f"memory ({available_memory/GiB_bytes:.2f} GiB)." + f"memory ({available_memory/GiB_bytes:.2f} GiB). " f"{estimated_msg} " - f" Try increasing `gpu_memory_utilization` or decreasing " + f"Try increasing `gpu_memory_utilization` or decreasing " f"`max_model_len` when initializing the engine.")