mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 17:05:36 +08:00
[ci][distributed] fix flaky tests (#6806)
This commit is contained in:
parent
1adddb14bf
commit
443c7cf4cf
@ -1,3 +1,10 @@
|
|||||||
|
"""
|
||||||
|
WARNING: This test runs in both single-node (4 GPUs) and multi-node
|
||||||
|
(2 node with 2 GPUs each) modes. If the test only uses 2 GPUs, it is
|
||||||
|
important to set the distributed backend to "mp" to avoid Ray scheduling
|
||||||
|
all workers in a node other than the head node, which can cause the test
|
||||||
|
to fail.
|
||||||
|
"""
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@ -78,7 +85,7 @@ def test_pp_cudagraph(PP_SIZE, MODEL_NAME, ATTN_BACKEND):
|
|||||||
"--pipeline-parallel-size",
|
"--pipeline-parallel-size",
|
||||||
str(PP_SIZE),
|
str(PP_SIZE),
|
||||||
"--distributed-executor-backend",
|
"--distributed-executor-backend",
|
||||||
"ray",
|
"mp",
|
||||||
]
|
]
|
||||||
os.environ["VLLM_ATTENTION_BACKEND"] = ATTN_BACKEND
|
os.environ["VLLM_ATTENTION_BACKEND"] = ATTN_BACKEND
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user