From 2e4fe48c370e833350eae092eddd1490b65ff529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Mon, 29 Sep 2025 23:35:14 +0200 Subject: [PATCH] [NIXL] Increase default KV block eviction timeout on P (#25897) Signed-off-by: NickLucche --- docs/features/nixl_connector_usage.md | 2 +- vllm/envs.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/features/nixl_connector_usage.md b/docs/features/nixl_connector_usage.md index afecbc82947b..5e273af05dc5 100644 --- a/docs/features/nixl_connector_usage.md +++ b/docs/features/nixl_connector_usage.md @@ -84,7 +84,7 @@ python tests/v1/kv_connector/nixl_integration/toy_proxy_server.py \ - Connection info is passed via KVTransferParams from prefiller to decoder for handshake - `VLLM_NIXL_ABORT_REQUEST_TIMEOUT`: Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. (Optional) - - Default: 120 + - Default: 480 - If a request is aborted and the decoder has not yet read the KV-cache blocks through the nixl channel, the prefill instance will release its KV-cache blocks after this timeout to avoid holding them indefinitely. ## Multi-Instance Setup diff --git a/vllm/envs.py b/vllm/envs.py index f06c860b8297..ffa7ed5c3aa5 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -174,7 +174,7 @@ if TYPE_CHECKING: "NONE"] = "NONE" VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16: bool = True VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB: Optional[int] = None - VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120 + VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 480 VLLM_USE_CUDNN_PREFILL: bool = False VLLM_ENABLE_CUDAGRAPH_GC: bool = False VLLM_LOOPBACK_IP: str = "" @@ -1330,7 +1330,7 @@ environment_variables: dict[str, Callable[[], Any]] = { # consumer. This is only applicable when using NixlConnector in a # disaggregated decode-prefill setup. "VLLM_NIXL_ABORT_REQUEST_TIMEOUT": - lambda: int(os.getenv("VLLM_NIXL_ABORT_REQUEST_TIMEOUT", "120")), + lambda: int(os.getenv("VLLM_NIXL_ABORT_REQUEST_TIMEOUT", "480")), # Controls whether or not to use cudnn prefill "VLLM_USE_CUDNN_PREFILL":