From 5871f9dbe492ac21b5d1a9a31c886fb2bf7f1f2d Mon Sep 17 00:00:00 2001 From: yurekami Date: Thu, 25 Dec 2025 02:11:07 +0900 Subject: [PATCH] Make EngineCore shutdown timeout configurable via environment variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `VLLM_ENGINE_SHUTDOWN_TIMEOUT` environment variable to configure the timeout for engine core shutdown. This allows users to adjust the timeout based on their cleanup requirements. Changes: - Add `VLLM_ENGINE_SHUTDOWN_TIMEOUT` env var in `vllm/envs.py` (default: 5 seconds) - Use the env var in `vllm/v1/utils.py:shutdown()` for process termination - Use the env var in `vllm/v1/engine/core.py` for output thread join Fixes #31252 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 Signed-off-by: yurekami --- vllm/envs.py | 6 ++++++ vllm/v1/engine/core.py | 3 ++- vllm/v1/utils.py | 5 +++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 1d4128d74b95c..bf58b438a5965 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -847,6 +847,12 @@ environment_variables: dict[str, Callable[[], Any]] = { "VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int( os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5") ), + # Timeout in seconds for engine core shutdown. + # This controls how long to wait for engine core processes to terminate + # gracefully before force killing them. + "VLLM_ENGINE_SHUTDOWN_TIMEOUT": lambda: int( + os.environ.get("VLLM_ENGINE_SHUTDOWN_TIMEOUT", "5") + ), # a list of plugin names to load, separated by commas. # if this is not set, it means all plugins will be loaded # if this is set to an empty string, no plugins will be loaded diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 5f8883c164b3e..00e0287b23fed 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -16,6 +16,7 @@ from typing import Any, TypeVar, cast import msgspec import zmq +import vllm.envs as envs from vllm.config import ParallelConfig, VllmConfig from vllm.distributed import stateless_destroy_torch_distributed_process_group from vllm.envs import enable_envs_cache @@ -1001,7 +1002,7 @@ class EngineCoreProc(EngineCore): self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD) # Wait until msg sent by the daemon before shutdown. - self.output_thread.join(timeout=5.0) + self.output_thread.join(timeout=envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT) if self.output_thread.is_alive(): logger.fatal( "vLLM shutdown signal from EngineCore failed " diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py index 29099d1e9b17e..648d2aefbe806 100644 --- a/vllm/v1/utils.py +++ b/vllm/v1/utils.py @@ -304,8 +304,9 @@ def shutdown(procs: list[BaseProcess]): if proc.is_alive(): proc.terminate() - # Allow 5 seconds for remaining procs to terminate. - deadline = time.monotonic() + 5 + # Allow time for remaining procs to terminate. + timeout = envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT + deadline = time.monotonic() + timeout for proc in procs: remaining = deadline - time.monotonic() if remaining <= 0: