From 2ad6194a0250932da7354e4d9dbebaf610f90202 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Sat, 31 May 2025 23:41:29 -0400 Subject: [PATCH] Let max_num_batched_tokens use human_readable_int for large numbers (#18968) Signed-off-by: mgoin --- vllm/engine/arg_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 13d8a280e53a..55553252630f 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -224,7 +224,7 @@ def get_kwargs(cls: ConfigType) -> dict[str, Any]: elif contains_type(type_hints, int): kwargs[name]["type"] = int # Special case for large integers - if name in {"max_model_len"}: + if name in {"max_model_len", "max_num_batched_tokens"}: kwargs[name]["type"] = human_readable_int elif contains_type(type_hints, float): kwargs[name]["type"] = float