mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 07:14:59 +08:00
[CI/Build] Remove V0 LoRA test (#19066)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
parent
4e88723f32
commit
4e68ae5e59
@ -6,6 +6,8 @@ import pytest
|
|||||||
|
|
||||||
import vllm.envs as env
|
import vllm.envs as env
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
|
from vllm.entrypoints.openai.api_server import (
|
||||||
|
build_async_engine_client_from_engine_args)
|
||||||
from vllm.inputs import TextPrompt
|
from vllm.inputs import TextPrompt
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
@ -16,14 +18,6 @@ LORA_RANK = 64
|
|||||||
DEFAULT_MAX_LORAS = 4 * 3
|
DEFAULT_MAX_LORAS = 4 * 3
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def v1(run_with_both_engines_lora):
|
|
||||||
# Simple autouse wrapper to run both engines for each test
|
|
||||||
# This can be promoted up to conftest.py to run for every
|
|
||||||
# test in a package
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def get_lora_requests(lora_path) -> list[LoRARequest]:
|
def get_lora_requests(lora_path) -> list[LoRARequest]:
|
||||||
lora_requests: list[LoRARequest] = [
|
lora_requests: list[LoRARequest] = [
|
||||||
LoRARequest(lora_name=f"{i}", lora_int_id=i, lora_path=lora_path)
|
LoRARequest(lora_name=f"{i}", lora_int_id=i, lora_path=lora_path)
|
||||||
@ -88,17 +82,6 @@ async def test_add_lora(chatglm3_lora_files):
|
|||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
enforce_eager=True)
|
enforce_eager=True)
|
||||||
|
|
||||||
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
|
|
||||||
# environment variable. reload vllm.enging.async_llm_engine as
|
|
||||||
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
|
|
||||||
# env var.
|
|
||||||
import importlib
|
|
||||||
|
|
||||||
import vllm.engine.async_llm_engine
|
|
||||||
importlib.reload(vllm.engine.async_llm_engine)
|
|
||||||
from vllm.entrypoints.openai.api_server import (
|
|
||||||
build_async_engine_client_from_engine_args)
|
|
||||||
|
|
||||||
# split lora_requests into 3 parts
|
# split lora_requests into 3 parts
|
||||||
part_size = len(lora_requests) // 3
|
part_size = len(lora_requests) // 3
|
||||||
dummy_run_requests = lora_requests[:part_size]
|
dummy_run_requests = lora_requests[:part_size]
|
||||||
|
|||||||
@ -1,7 +1,5 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
import vllm
|
import vllm
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
|
|
||||||
@ -18,14 +16,6 @@ EXPECTED_LORA_OUTPUT = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def v1(run_with_both_engines_lora):
|
|
||||||
# Simple autouse wrapper to run both engines for each test
|
|
||||||
# This can be promoted up to conftest.py to run for every
|
|
||||||
# test in a package
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
|
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int) -> list[str]:
|
||||||
prompts = [
|
prompts = [
|
||||||
PROMPT_TEMPLATE.format(query="How many singers do we have?"),
|
PROMPT_TEMPLATE.format(query="How many singers do we have?"),
|
||||||
|
|||||||
@ -33,14 +33,6 @@ EXPECTED_LORA_OUTPUT = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def v1(run_with_both_engines_lora):
|
|
||||||
# Simple autouse wrapper to run both engines for each test
|
|
||||||
# This can be promoted up to conftest.py to run for every
|
|
||||||
# test in a package
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def do_sample(llm: vllm.LLM,
|
def do_sample(llm: vllm.LLM,
|
||||||
lora_path: str,
|
lora_path: str,
|
||||||
lora_id: int,
|
lora_id: int,
|
||||||
|
|||||||
@ -2,26 +2,24 @@
|
|||||||
"""
|
"""
|
||||||
Script to test add_lora, remove_lora, pin_lora, list_loras functions.
|
Script to test add_lora, remove_lora, pin_lora, list_loras functions.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
|
||||||
from vllm.engine.llm_engine import LLMEngine
|
from vllm.engine.llm_engine import LLMEngine
|
||||||
|
from vllm.entrypoints.openai.api_server import (
|
||||||
|
build_async_engine_client_from_engine_args)
|
||||||
from vllm.lora.request import LoRARequest
|
from vllm.lora.request import LoRARequest
|
||||||
|
|
||||||
MODEL_PATH = "meta-llama/Llama-2-7b-hf"
|
MODEL_PATH = "meta-llama/Llama-2-7b-hf"
|
||||||
LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test"
|
LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test"
|
||||||
LORA_RANK = 8
|
LORA_RANK = 8
|
||||||
|
|
||||||
|
# @pytest.fixture(autouse=True)
|
||||||
@pytest.fixture(autouse=True)
|
# def v1(run_with_both_engines_lora):
|
||||||
def v1(run_with_both_engines_lora):
|
# # Simple autouse wrapper to run both engines for each test
|
||||||
# Simple autouse wrapper to run both engines for each test
|
# # This can be promoted up to conftest.py to run for every
|
||||||
# This can be promoted up to conftest.py to run for every
|
# # test in a package
|
||||||
# test in a package
|
# pass
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def make_lora_request(lora_id: int):
|
def make_lora_request(lora_id: int):
|
||||||
@ -79,22 +77,6 @@ def test_lora_functions_sync():
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_lora_functions_async():
|
async def test_lora_functions_async():
|
||||||
|
|
||||||
if os.getenv("VLLM_USE_V1") == "0":
|
|
||||||
pytest.skip(
|
|
||||||
reason=
|
|
||||||
"V0 AsyncLLMEngine does not expose remove/list/pin LoRA functions")
|
|
||||||
|
|
||||||
# The run_with_both_engines_lora fixture sets up the `VLLM_USE_V1`
|
|
||||||
# environment variable. reload vllm.enging.async_llm_engine as
|
|
||||||
# vllm.engine.async_llm_engine.AsyncLLMEgnine changes depending on the
|
|
||||||
# env var.
|
|
||||||
import importlib
|
|
||||||
|
|
||||||
import vllm.engine.async_llm_engine
|
|
||||||
importlib.reload(vllm.engine.async_llm_engine)
|
|
||||||
from vllm.entrypoints.openai.api_server import (
|
|
||||||
build_async_engine_client_from_engine_args)
|
|
||||||
|
|
||||||
max_loras = 4
|
max_loras = 4
|
||||||
engine_args = AsyncEngineArgs(model=MODEL_PATH,
|
engine_args = AsyncEngineArgs(model=MODEL_PATH,
|
||||||
enable_lora=True,
|
enable_lora=True,
|
||||||
|
|||||||
@ -10,14 +10,6 @@ from vllm.platforms import current_platform
|
|||||||
MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
MODEL_PATH = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def v1(run_with_both_engines_lora):
|
|
||||||
# Simple autouse wrapper to run both engines for each test
|
|
||||||
# This can be promoted up to conftest.py to run for every
|
|
||||||
# test in a package
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int,
|
def do_sample(llm: vllm.LLM, lora_path: str, lora_id: int,
|
||||||
prompts: list[str]) -> list[str]:
|
prompts: list[str]) -> list[str]:
|
||||||
|
|
||||||
|
|||||||
@ -37,14 +37,6 @@ else:
|
|||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def v1(run_with_both_engines_lora):
|
|
||||||
# Simple autouse wrapper to run both engines for each test
|
|
||||||
# This can be promoted up to conftest.py to run for every
|
|
||||||
# test in a package
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def do_sample(llm: vllm.LLM,
|
def do_sample(llm: vllm.LLM,
|
||||||
lora_path: str,
|
lora_path: str,
|
||||||
lora_id: int,
|
lora_id: int,
|
||||||
|
|||||||
@ -13,14 +13,6 @@ from vllm.platforms import current_platform
|
|||||||
from vllm.sampling_params import BeamSearchParams
|
from vllm.sampling_params import BeamSearchParams
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=not current_platform.is_cpu())
|
|
||||||
def v1(run_with_both_engines_lora):
|
|
||||||
# Simple autouse wrapper to run both engines for each test
|
|
||||||
# This can be promoted up to conftest.py to run for every
|
|
||||||
# test in a package
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TestConfig:
|
class TestConfig:
|
||||||
model_path: str
|
model_path: str
|
||||||
|
|||||||
@ -6,8 +6,6 @@ import tempfile
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
from unittest.mock import patch
|
from unittest.mock import patch
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
import vllm.envs as envs
|
import vllm.envs as envs
|
||||||
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
from vllm.config import (CacheConfig, DeviceConfig, LoadConfig, LoRAConfig,
|
||||||
ModelConfig, ParallelConfig, SchedulerConfig,
|
ModelConfig, ParallelConfig, SchedulerConfig,
|
||||||
@ -18,14 +16,6 @@ from vllm.v1.worker.gpu_worker import Worker as V1Worker
|
|||||||
from vllm.worker.worker import Worker
|
from vllm.worker.worker import Worker
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(autouse=True)
|
|
||||||
def v1(run_with_both_engines_lora):
|
|
||||||
# Simple autouse wrapper to run both engines for each test
|
|
||||||
# This can be promoted up to conftest.py to run for every
|
|
||||||
# test in a package
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@patch.dict(os.environ, {"RANK": "0"})
|
@patch.dict(os.environ, {"RANK": "0"})
|
||||||
def test_worker_apply_lora(sql_lora_files):
|
def test_worker_apply_lora(sql_lora_files):
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user