Make EngineCore shutdown timeout configurable via environment variable

Add `VLLM_ENGINE_SHUTDOWN_TIMEOUT` environment variable to configure
the timeout for engine core shutdown. This allows users to adjust the
timeout based on their cleanup requirements.

Changes:
- Add `VLLM_ENGINE_SHUTDOWN_TIMEOUT` env var in `vllm/envs.py` (default: 5 seconds)
- Use the env var in `vllm/v1/utils.py:shutdown()` for process termination
- Use the env var in `vllm/v1/engine/core.py` for output thread join

Fixes #31252

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: yurekami <yurekami@users.noreply.github.com>
This commit is contained in:
yurekami 2025-12-25 02:11:07 +09:00
parent 5d93089686
commit 5871f9dbe4
3 changed files with 11 additions and 3 deletions

View File

@ -847,6 +847,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")
),
# Timeout in seconds for engine core shutdown.
# This controls how long to wait for engine core processes to terminate
# gracefully before force killing them.
"VLLM_ENGINE_SHUTDOWN_TIMEOUT": lambda: int(
os.environ.get("VLLM_ENGINE_SHUTDOWN_TIMEOUT", "5")
),
# a list of plugin names to load, separated by commas.
# if this is not set, it means all plugins will be loaded
# if this is set to an empty string, no plugins will be loaded

View File

@ -16,6 +16,7 @@ from typing import Any, TypeVar, cast
import msgspec
import zmq
import vllm.envs as envs
from vllm.config import ParallelConfig, VllmConfig
from vllm.distributed import stateless_destroy_torch_distributed_process_group
from vllm.envs import enable_envs_cache
@ -1001,7 +1002,7 @@ class EngineCoreProc(EngineCore):
self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD)
# Wait until msg sent by the daemon before shutdown.
self.output_thread.join(timeout=5.0)
self.output_thread.join(timeout=envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT)
if self.output_thread.is_alive():
logger.fatal(
"vLLM shutdown signal from EngineCore failed "

View File

@ -304,8 +304,9 @@ def shutdown(procs: list[BaseProcess]):
if proc.is_alive():
proc.terminate()
# Allow 5 seconds for remaining procs to terminate.
deadline = time.monotonic() + 5
# Allow time for remaining procs to terminate.
timeout = envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT
deadline = time.monotonic() + timeout
for proc in procs:
remaining = deadline - time.monotonic()
if remaining <= 0: