Merge 5871f9dbe492ac21b5d1a9a31c886fb2bf7f1f2d into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

2026-05-03 08:31:24 +08:00 · 2025-12-25 00:07:13 +00:00 · 2025-12-25 00:07:13 +00:00 · acfb39b842
commit acfb39b842
parent 254f6b9867 5871f9dbe4
3 changed files with 11 additions and 3 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@ -847,6 +847,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
        os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")
    ),
+    # Timeout in seconds for engine core shutdown.
+    # This controls how long to wait for engine core processes to terminate
+    # gracefully before force killing them.
+    "VLLM_ENGINE_SHUTDOWN_TIMEOUT": lambda: int(
+        os.environ.get("VLLM_ENGINE_SHUTDOWN_TIMEOUT", "5")
+    ),
    # a list of plugin names to load, separated by commas.
    # if this is not set, it means all plugins will be loaded
    # if this is set to an empty string, no plugins will be loaded
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@ -16,6 +16,7 @@ from typing import Any, TypeVar, cast
 import msgspec
 import zmq

+import vllm.envs as envs
 from vllm.config import ParallelConfig, VllmConfig
 from vllm.distributed import stateless_destroy_torch_distributed_process_group
 from vllm.envs import enable_envs_cache
@ -1001,7 +1002,7 @@ class EngineCoreProc(EngineCore):
        self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD)

        # Wait until msg sent by the daemon before shutdown.
-        self.output_thread.join(timeout=5.0)
+        self.output_thread.join(timeout=envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT)
        if self.output_thread.is_alive():
            logger.fatal(
                "vLLM shutdown signal from EngineCore failed "
--- a/vllm/v1/utils.py
+++ b/vllm/v1/utils.py
@ -304,8 +304,9 @@ def shutdown(procs: list[BaseProcess]):
        if proc.is_alive():
            proc.terminate()

-    # Allow 5 seconds for remaining procs to terminate.
-    deadline = time.monotonic() + 5
+    # Allow time for remaining procs to terminate.
+    timeout = envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT
+    deadline = time.monotonic() + timeout
    for proc in procs:
        remaining = deadline - time.monotonic()
        if remaining <= 0: