Make EngineCore shutdown timeout configurable via environment variable

Add `VLLM_ENGINE_SHUTDOWN_TIMEOUT` environment variable to configure
the timeout for engine core shutdown. This allows users to adjust the
timeout based on their cleanup requirements.

Changes:
- Add `VLLM_ENGINE_SHUTDOWN_TIMEOUT` env var in `vllm/envs.py` (default: 5 seconds)
- Use the env var in `vllm/v1/utils.py:shutdown()` for process termination
- Use the env var in `vllm/v1/engine/core.py` for output thread join

Fixes #31252

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
Signed-off-by: yurekami <yurekami@users.noreply.github.com>
This commit is contained in:
yurekami 2025-12-25 02:11:07 +09:00
parent 5d93089686
commit 5871f9dbe4
3 changed files with 11 additions and 3 deletions

View File

@ -847,6 +847,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int( "VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5") os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")
), ),
# Timeout in seconds for engine core shutdown.
# This controls how long to wait for engine core processes to terminate
# gracefully before force killing them.
"VLLM_ENGINE_SHUTDOWN_TIMEOUT": lambda: int(
os.environ.get("VLLM_ENGINE_SHUTDOWN_TIMEOUT", "5")
),
# a list of plugin names to load, separated by commas. # a list of plugin names to load, separated by commas.
# if this is not set, it means all plugins will be loaded # if this is not set, it means all plugins will be loaded
# if this is set to an empty string, no plugins will be loaded # if this is set to an empty string, no plugins will be loaded

View File

@ -16,6 +16,7 @@ from typing import Any, TypeVar, cast
import msgspec import msgspec
import zmq import zmq
import vllm.envs as envs
from vllm.config import ParallelConfig, VllmConfig from vllm.config import ParallelConfig, VllmConfig
from vllm.distributed import stateless_destroy_torch_distributed_process_group from vllm.distributed import stateless_destroy_torch_distributed_process_group
from vllm.envs import enable_envs_cache from vllm.envs import enable_envs_cache
@ -1001,7 +1002,7 @@ class EngineCoreProc(EngineCore):
self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD) self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD)
# Wait until msg sent by the daemon before shutdown. # Wait until msg sent by the daemon before shutdown.
self.output_thread.join(timeout=5.0) self.output_thread.join(timeout=envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT)
if self.output_thread.is_alive(): if self.output_thread.is_alive():
logger.fatal( logger.fatal(
"vLLM shutdown signal from EngineCore failed " "vLLM shutdown signal from EngineCore failed "

View File

@ -304,8 +304,9 @@ def shutdown(procs: list[BaseProcess]):
if proc.is_alive(): if proc.is_alive():
proc.terminate() proc.terminate()
# Allow 5 seconds for remaining procs to terminate. # Allow time for remaining procs to terminate.
deadline = time.monotonic() + 5 timeout = envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT
deadline = time.monotonic() + timeout
for proc in procs: for proc in procs:
remaining = deadline - time.monotonic() remaining = deadline - time.monotonic()
if remaining <= 0: if remaining <= 0: