diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 5d03390335a4e..730f272b54e7c 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -136,6 +136,10 @@ steps: - examples/offline_inference/rlhf_colocate.py - tests/examples/offline_inference/data_parallel.py commands: + # test with tp=2 and external_dp=2 + - VLLM_USE_V1=0 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py + - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py + # test with internal dp - python3 ../examples/offline_inference/data_parallel.py - pytest -v -s distributed/test_utils.py - pytest -v -s compile/test_basic_correctness.py @@ -512,8 +516,6 @@ steps: - entrypoints/llm/test_collective_rpc.py commands: - pytest -v -s entrypoints/llm/test_collective_rpc.py - - VLLM_USE_V1=1 torchrun --nproc-per-node=2 distributed/test_torchrun_example.py - - torchrun --nproc-per-node=2 distributed/test_torchrun_example.py - pytest -v -s ./compile/test_basic_correctness.py - pytest -v -s ./compile/test_wrapper.py - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' diff --git a/tests/distributed/test_torchrun_example.py b/tests/distributed/test_torchrun_example.py index 4ef33932538ef..0420a6454d461 100644 --- a/tests/distributed/test_torchrun_example.py +++ b/tests/distributed/test_torchrun_example.py @@ -9,6 +9,8 @@ import torch.distributed as dist from vllm import LLM, SamplingParams from vllm.distributed.parallel_state import get_world_group +dist.init_process_group(backend="gloo") + # Create prompts prompts = [ "Hello, my name is",