[CI/Build] Remove V0 LoRA test (#19066)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Jee Jee Li 2025-06-03 22:30:18 +08:00 committed by GitHub
parent 4e88723f32
commit 4e68ae5e59
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 10 additions and 97 deletions

View File

@ -6,6 +6,8 @@ import pytest
import vllm.envs as env import vllm.envs as env
from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
from vllm.inputs import TextPrompt from vllm.inputs import TextPrompt
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
@ -16,14 +18,6 @@ LORA_RANK = 64
DEFAULT_MAX_LORAS = 4 * 3 DEFAULT_MAX_LORAS = 4 * 3
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def get_lora_requests(lora_path) -> list[LoRARequest]: def get_lora_requests(lora_path) -> list[LoRARequest]:
lora_requests: list[LoRARequest] = [ lora_requests: list[LoRARequest] = [
LoRARequest(lora_name=f"{i}", lora_int_id=i, lora_path=lora_path) LoRARequest(lora_name=f"{i}", lora_int_id=i, lora_path=lora_path)
@ -88,17 +82,6 @@ async def test_add_lora(chatglm3_lora_files):
trust_remote_code=True, trust_remote_code=True,
enforce_eager=True) enforce_eager=True)
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import importlib
import vllm.engine.async_llm_engine
importlib.reload(vllm.engine.async_llm_engine)
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
# split lora_requests into 3 parts # split lora_requests into 3 parts
part_size = len(lora_requests) // 3 part_size = len(lora_requests) // 3
dummy_run_requests = lora_requests[:part_size] dummy_run_requests = lora_requests[:part_size]

View File

@ -1,7 +1,5 @@
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
import pytest
import vllm import vllm
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
@ -18,14 +16,6 @@ EXPECTED_LORA_OUTPUT = [
] ]
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]: def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
prompts = [ prompts = [
PROMPT_TEMPLATE.format(query="How many singers do we have?"), PROMPT_TEMPLATE.format(query="How many singers do we have?"),

View File

@ -33,14 +33,6 @@ EXPECTED_LORA_OUTPUT = [
] ]
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def do_sample(llm: vllm.LLM, def do_sample(llm: vllm.LLM,
lora_path: str, lora_path: str,
lora_id: int, lora_id: int,

View File

@ -2,26 +2,24 @@
""" """
Script to test add_lora, remove_lora, pin_lora, list_loras functions. Script to test add_lora, remove_lora, pin_lora, list_loras functions.
""" """
import os
import pytest import pytest
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
from vllm.engine.llm_engine import LLMEngine from vllm.engine.llm_engine import LLMEngine
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
from vllm.lora.request import LoRARequest from vllm.lora.request import LoRARequest
MODEL_PATH = "meta-llama/Llama-2-7b-hf" MODEL_PATH = "meta-llama/Llama-2-7b-hf"
LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test" LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test"
LORA_RANK = 8 LORA_RANK = 8
# @pytest.fixture(autouse=True)
@pytest.fixture(autouse=True) # def v1(run_with_both_engines_lora):
def v1(run_with_both_engines_lora): # # Simple autouse wrapper to run both engines for each test
# Simple autouse wrapper to run both engines for each test # # This can be promoted up to conftest.py to run for every
# This can be promoted up to conftest.py to run for every # # test in a package
# test in a package # pass
pass
def make_lora_request(lora_id: int): def make_lora_request(lora_id: int):
@ -79,22 +77,6 @@ def test_lora_functions_sync():
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_lora_functions_async(): async def test_lora_functions_async():
if os.getenv("VLLM_USE_V1") == "0":
pytest.skip(
reason=
"V0 AsyncLLMEngine does not expose remove/list/pin LoRA functions")
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
# environment variable. reload vllm.enging.async_llm_engine as
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
# env var.
import importlib
import vllm.engine.async_llm_engine
importlib.reload(vllm.engine.async_llm_engine)
from vllm.entrypoints.openai.api_server import (
build_async_engine_client_from_engine_args)
max_loras = 4 max_loras = 4
engine_args = AsyncEngineArgs(model=MODEL_PATH, engine_args = AsyncEngineArgs(model=MODEL_PATH,
enable_lora=True, enable_lora=True,

View File

@ -10,14 +10,6 @@ from vllm.platforms import current_platform
MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1" MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1"
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int, def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int,
prompts: list[str]) -> list[str]: prompts: list[str]) -> list[str]:

View File

@ -37,14 +37,6 @@ else:
] ]
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
def do_sample(llm: vllm.LLM, def do_sample(llm: vllm.LLM,
lora_path: str, lora_path: str,
lora_id: int, lora_id: int,

View File

@ -13,14 +13,6 @@ from vllm.platforms import current_platform
from vllm.sampling_params import BeamSearchParams from vllm.sampling_params import BeamSearchParams
@pytest.fixture(autouse=not current_platform.is_cpu())
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
@dataclass @dataclass
class TestConfig: class TestConfig:
model_path: str model_path: str

View File

@ -6,8 +6,6 @@ import tempfile
from typing import Union from typing import Union
from unittest.mock import patch from unittest.mock import patch
import pytest
import vllm.envs as envs import vllm.envs as envs
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig, from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
ModelConfig, ParallelConfig, SchedulerConfig, ModelConfig, ParallelConfig, SchedulerConfig,
@ -18,14 +16,6 @@ from vllm.v1.worker.gpu_worker import Worker as V1Worker
from vllm.worker.worker import Worker from vllm.worker.worker import Worker
@pytest.fixture(autouse=True)
def v1(run_with_both_engines_lora):
# Simple autouse wrapper to run both engines for each test
# This can be promoted up to conftest.py to run for every
# test in a package
pass
@patch.dict(os.environ, {"RANK": "0"}) @patch.dict(os.environ, {"RANK": "0"})
def test_worker_apply_lora(sql_lora_files): def test_worker_apply_lora(sql_lora_files):