[CI/Build] Further clean up LoRA tests (#15920)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
2025-12-15 10:57:10 +08:00 · 2025-04-02 16:39:09 +08:00 · 2025-04-02 16:39:09 +08:00 · 4203926f10
commit 4203926f10
parent cdb57015a7
6 changed files with 9 additions and 46 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -289,7 +289,7 @@ steps:
  source_file_dependencies:
  - vllm/lora
  - tests/lora
-  command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py --ignore=lora/test_minicpmv_tp.py  --ignore=lora/test_transfomers_model.py
+  command: pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB --num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT --ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py
  parallelism: 4
 - label: PyTorch Fullgraph Smoke Test # 9min
@ -602,8 +602,6 @@ steps:
    # requires multi-GPU testing for validation.
    - pytest -v -s -x lora/test_chatglm3_tp.py
    - pytest -v -s -x lora/test_llama_tp.py
    - pytest -v -s -x lora/test_minicpmv_tp.py
    - pytest -v -s -x lora/test_transfomers_model.py
 - label: Weight Loading Multiple GPU Test  # 33min
--- a/tests/lora/conftest.py
+++ b/tests/lora/conftest.py
@ -2,7 +2,6 @@
 import tempfile
 from collections import OrderedDict
 from typing import TypedDict
 from unittest.mock import MagicMock, patch
 import pytest
@ -26,28 +25,6 @@ from vllm.model_executor.models.interfaces import SupportsLoRA
 from vllm.platforms import current_platform
 class ContextIDInfo(TypedDict):
    lora_id: int
    context_length: str
 class ContextInfo(TypedDict):
    lora: str
    context_length: str
 LONG_LORA_INFOS: list[ContextIDInfo] = [{
    "lora_id": 1,
    "context_length": "16k",
 }, {
    "lora_id": 2,
    "context_length": "16k",
 }, {
    "lora_id": 3,
    "context_length": "32k",
 }]
@pytest.fixture()
 def should_do_global_cleanup_after_test(request) -> bool:
    """Allow subdirectories to skip global cleanup by overriding this fixture.
--- a/tests/lora/test_layers.py
+++ b/tests/lora/test_layers.py
@ -59,7 +59,7 @@ DEVICES = ([
 # prefill stage(True) or decode stage(False)
 STAGES = [True, False]
-NUM_RANDOM_SEEDS = 10
+NUM_RANDOM_SEEDS = 6
 VOCAB_PARALLEL_EMBEDDING_TEST_NUM_RANDOM_SEEDS = 128
--- a/tests/lora/test_llama_tp.py
+++ b/tests/lora/test_llama_tp.py
@ -153,20 +153,3 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files):
        enable_chunked_prefill=True,
    )
    generate_and_test(llm, sql_lora_files)
@multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test()
 def test_llama_lora_tp4_fully_sharded_enable_bias(sql_lora_files):
    llm = vllm.LLM(
        MODEL_PATH,
        enable_lora=True,
        max_num_seqs=16,
        max_loras=4,
        tensor_parallel_size=4,
        fully_sharded_loras=True,
        enable_lora_bias=True,
        enable_chunked_prefill=True,
    )
    generate_and_test(llm, sql_lora_files)
--- a/tests/lora/test_minicpmv_tp.py
+++ b/tests/lora/test_minicpmv_tp.py
@ -58,7 +58,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
@pytest.mark.xfail(
    current_platform.is_rocm(),
    reason="MiniCPM-V dependency xformers incompatible with ROCm")
@create_new_process_for_each_test()
 def test_minicpmv_lora(minicpmv_lora_files):
    llm = vllm.LLM(
        MODEL_PATH,
--- a/tests/lora/test_transfomers_model.py
+++ b/tests/lora/test_transfomers_model.py
@ -1,7 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 import pytest
 import vllm
 from vllm.lora.request import LoRARequest
 from vllm.platforms import current_platform
 from ..utils import create_new_process_for_each_test, multi_gpu_test
@ -44,7 +47,6 @@ def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
    return generated_texts
@create_new_process_for_each_test()
 def test_ilama_lora(ilama_lora_files):
    llm = vllm.LLM(MODEL_PATH,
                   max_model_len=1024,
@ -63,6 +65,8 @@ def test_ilama_lora(ilama_lora_files):
        assert output2[i] == EXPECTED_LORA_OUTPUT[i]
@pytest.mark.skipif(current_platform.is_cuda_alike(),
                    reason="Skipping to avoid redundant model tests")
@multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test()
 def test_ilama_lora_tp4(ilama_lora_files):
@ -84,6 +88,8 @@ def test_ilama_lora_tp4(ilama_lora_files):
        assert output2[i] == EXPECTED_LORA_OUTPUT[i]
@pytest.mark.skipif(current_platform.is_cuda_alike(),
                    reason="Skipping to avoid redundant model tests")
@multi_gpu_test(num_gpus=4)
@create_new_process_for_each_test()
 def test_ilama_lora_tp4_fully_sharded_loras(ilama_lora_files):