From 5871f9dbe492ac21b5d1a9a31c886fb2bf7f1f2d Mon Sep 17 00:00:00 2001
From: yurekami <yurekami@users.noreply.github.com>
Date: Thu, 25 Dec 2025 02:11:07 +0900
Subject: [PATCH] Make EngineCore shutdown timeout configurable via environment
 variable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add `VLLM_ENGINE_SHUTDOWN_TIMEOUT` environment variable to configure
the timeout for engine core shutdown. This allows users to adjust the
timeout based on their cleanup requirements.

Changes:
- Add `VLLM_ENGINE_SHUTDOWN_TIMEOUT` env var in `vllm/envs.py` (default: 5 seconds)
- Use the env var in `vllm/v1/utils.py:shutdown()` for process termination
- Use the env var in `vllm/v1/engine/core.py` for output thread join

Fixes #31252

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: yurekami <yurekami@users.noreply.github.com>
---
 vllm/envs.py           | 6 ++++++
 vllm/v1/engine/core.py | 3 ++-
 vllm/v1/utils.py       | 5 +++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/vllm/envs.py b/vllm/envs.py
index 1d4128d74b95c..bf58b438a5965 100755
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -847,6 +847,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
     "VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
         os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")
     ),
+    # Timeout in seconds for engine core shutdown.
+    # This controls how long to wait for engine core processes to terminate
+    # gracefully before force killing them.
+    "VLLM_ENGINE_SHUTDOWN_TIMEOUT": lambda: int(
+        os.environ.get("VLLM_ENGINE_SHUTDOWN_TIMEOUT", "5")
+    ),
     # a list of plugin names to load, separated by commas.
     # if this is not set, it means all plugins will be loaded
     # if this is set to an empty string, no plugins will be loaded
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index 5f8883c164b3e..00e0287b23fed 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -16,6 +16,7 @@ from typing import Any, TypeVar, cast
 import msgspec
 import zmq
 
+import vllm.envs as envs
 from vllm.config import ParallelConfig, VllmConfig
 from vllm.distributed import stateless_destroy_torch_distributed_process_group
 from vllm.envs import enable_envs_cache
@@ -1001,7 +1002,7 @@ class EngineCoreProc(EngineCore):
         self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD)
 
         # Wait until msg sent by the daemon before shutdown.
-        self.output_thread.join(timeout=5.0)
+        self.output_thread.join(timeout=envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT)
         if self.output_thread.is_alive():
             logger.fatal(
                 "vLLM shutdown signal from EngineCore failed "
diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py
index 29099d1e9b17e..648d2aefbe806 100644
--- a/vllm/v1/utils.py
+++ b/vllm/v1/utils.py
@@ -304,8 +304,9 @@ def shutdown(procs: list[BaseProcess]):
         if proc.is_alive():
             proc.terminate()
 
-    # Allow 5 seconds for remaining procs to terminate.
-    deadline = time.monotonic() + 5
+    # Allow time for remaining procs to terminate.
+    timeout = envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT
+    deadline = time.monotonic() + timeout
     for proc in procs:
         remaining = deadline - time.monotonic()
         if remaining <= 0: