mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-25 06:44:25 +08:00
Merge 5871f9dbe492ac21b5d1a9a31c886fb2bf7f1f2d into 254f6b986720c92ddf97fbb1a6a6465da8e87e29
This commit is contained in:
commit
acfb39b842
@ -847,6 +847,12 @@ environment_variables: dict[str, Callable[[], Any]] = {
|
|||||||
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
|
"VLLM_HTTP_TIMEOUT_KEEP_ALIVE": lambda: int(
|
||||||
os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")
|
os.environ.get("VLLM_HTTP_TIMEOUT_KEEP_ALIVE", "5")
|
||||||
),
|
),
|
||||||
|
# Timeout in seconds for engine core shutdown.
|
||||||
|
# This controls how long to wait for engine core processes to terminate
|
||||||
|
# gracefully before force killing them.
|
||||||
|
"VLLM_ENGINE_SHUTDOWN_TIMEOUT": lambda: int(
|
||||||
|
os.environ.get("VLLM_ENGINE_SHUTDOWN_TIMEOUT", "5")
|
||||||
|
),
|
||||||
# a list of plugin names to load, separated by commas.
|
# a list of plugin names to load, separated by commas.
|
||||||
# if this is not set, it means all plugins will be loaded
|
# if this is not set, it means all plugins will be loaded
|
||||||
# if this is set to an empty string, no plugins will be loaded
|
# if this is set to an empty string, no plugins will be loaded
|
||||||
|
|||||||
@ -16,6 +16,7 @@ from typing import Any, TypeVar, cast
|
|||||||
import msgspec
|
import msgspec
|
||||||
import zmq
|
import zmq
|
||||||
|
|
||||||
|
import vllm.envs as envs
|
||||||
from vllm.config import ParallelConfig, VllmConfig
|
from vllm.config import ParallelConfig, VllmConfig
|
||||||
from vllm.distributed import stateless_destroy_torch_distributed_process_group
|
from vllm.distributed import stateless_destroy_torch_distributed_process_group
|
||||||
from vllm.envs import enable_envs_cache
|
from vllm.envs import enable_envs_cache
|
||||||
@ -1001,7 +1002,7 @@ class EngineCoreProc(EngineCore):
|
|||||||
self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD)
|
self.output_queue.put_nowait(EngineCoreProc.ENGINE_CORE_DEAD)
|
||||||
|
|
||||||
# Wait until msg sent by the daemon before shutdown.
|
# Wait until msg sent by the daemon before shutdown.
|
||||||
self.output_thread.join(timeout=5.0)
|
self.output_thread.join(timeout=envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT)
|
||||||
if self.output_thread.is_alive():
|
if self.output_thread.is_alive():
|
||||||
logger.fatal(
|
logger.fatal(
|
||||||
"vLLM shutdown signal from EngineCore failed "
|
"vLLM shutdown signal from EngineCore failed "
|
||||||
|
|||||||
@ -304,8 +304,9 @@ def shutdown(procs: list[BaseProcess]):
|
|||||||
if proc.is_alive():
|
if proc.is_alive():
|
||||||
proc.terminate()
|
proc.terminate()
|
||||||
|
|
||||||
# Allow 5 seconds for remaining procs to terminate.
|
# Allow time for remaining procs to terminate.
|
||||||
deadline = time.monotonic() + 5
|
timeout = envs.VLLM_ENGINE_SHUTDOWN_TIMEOUT
|
||||||
|
deadline = time.monotonic() + timeout
|
||||||
for proc in procs:
|
for proc in procs:
|
||||||
remaining = deadline - time.monotonic()
|
remaining = deadline - time.monotonic()
|
||||||
if remaining <= 0:
|
if remaining <= 0:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user