mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-29 10:14:01 +08:00
[Misc] Increase RayDistributedExecutor RAY_CGRAPH_get_timeout (#15301)
Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
This commit is contained in:
parent
790b79750b
commit
4c69e228b3
@ -561,6 +561,15 @@ class RayDistributedExecutor(DistributedExecutorBase):
|
||||
envs.VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL)
|
||||
logger.info("VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM = %s",
|
||||
envs.VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM)
|
||||
# Enlarge the default value of "RAY_CGRAPH_get_timeout" to 300 seconds
|
||||
# (it is 10 seconds by default). This is a Ray environment variable to
|
||||
# control the timeout of getting result from a compiled graph execution,
|
||||
# i.e., the distributed execution that includes model forward runs and
|
||||
# intermediate tensor communications, in the case of vllm.
|
||||
os.environ.setdefault("RAY_CGRAPH_get_timeout", "300") # noqa: SIM112
|
||||
logger.info("RAY_CGRAPH_get_timeout is set to %s",
|
||||
os.environ["RAY_CGRAPH_get_timeout"]) # noqa: SIM112
|
||||
|
||||
with InputNode() as input_data:
|
||||
# Example DAG: PP=2, TP=4
|
||||
#
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user