mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-04-09 12:47:06 +08:00
23 lines
1.0 KiB
YAML
23 lines
1.0 KiB
YAML
group: Models - Distributed
|
|
depends_on:
|
|
- image-build
|
|
steps:
|
|
- label: Distributed Model Tests (2 GPUs)
|
|
timeout_in_minutes: 50
|
|
working_dir: "/vllm-workspace/tests"
|
|
num_gpus: 2
|
|
source_file_dependencies:
|
|
- vllm/model_executor/model_loader/sharded_state_loader.py
|
|
- vllm/model_executor/models/
|
|
- tests/basic_correctness/
|
|
- tests/model_executor/model_loader/test_sharded_state_loader.py
|
|
- tests/models/
|
|
commands:
|
|
- TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)'
|
|
- CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py
|
|
# Avoid importing model tests that cause CUDA reinitialization error
|
|
- pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)'
|
|
- pytest models/language -v -s -m 'distributed(num_gpus=2)'
|
|
- pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py
|
|
- VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)'
|