Merge 5871f9dbe492ac21b5d1a9a31c886fb2bf7f1f2d into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

This commit is contained in:
ゆり 2025-12-25 00:07:13 +00:00 committed by GitHub
commit acfb39b842
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 11 additions and 3 deletions

View File

@ -847,6 +847,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int( "VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5") os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")
), ),
# Timeout in seconds for engine core shutdown.
# This controls how long to wait for engine core processes to terminate
# gracefully before force killing them.
"VLLM_ENGINE_SHUTDOWN_TIMEOUT": lambda: int(
os.environ.get("VLLM_ENGINE_SHUTDOWN_TIMEOUT", "5")
),
# a list of plugin names to load, separated by commas. # a list of plugin names to load, separated by commas.
# if this is not set, it means all plugins will be loaded # if this is not set, it means all plugins will be loaded
# if this is set to an empty string, no plugins will be loaded # if this is set to an empty string, no plugins will be loaded

View File

@ -16,6 +16,7 @@ from typing import Any, TypeVar, cast
import msgspec import msgspec
import zmq import zmq
import vllm.envs as envs
from vllm.config import ParallelConfig, VllmConfig from vllm.config import ParallelConfig, VllmConfig
from vllm.distributed import stateless_destroy_torch_distributed_process_group from vllm.distributed import stateless_destroy_torch_distributed_process_group
from vllm.envs import enable_envs_cache from vllm.envs import enable_envs_cache
@ -1001,7 +1002,7 @@ class EngineCoreProc(EngineCore):
self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD) self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD)
# Wait until msg sent by the daemon before shutdown. # Wait until msg sent by the daemon before shutdown.
self.output_thread.join(timeout=5.0) self.output_thread.join(timeout=envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT)
if self.output_thread.is_alive(): if self.output_thread.is_alive():
logger.fatal( logger.fatal(
"vLLM shutdown signal from EngineCore failed " "vLLM shutdown signal from EngineCore failed "

View File

@ -304,8 +304,9 @@ def shutdown(procs: list[BaseProcess]):
if proc.is_alive(): if proc.is_alive():
proc.terminate() proc.terminate()
# Allow 5 seconds for remaining procs to terminate. # Allow time for remaining procs to terminate.
deadline = time.monotonic() + 5 timeout = envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT
deadline = time.monotonic() + timeout
for proc in procs: for proc in procs:
remaining = deadline - time.monotonic() remaining = deadline - time.monotonic()
if remaining <= 0: if remaining <= 0: