mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 01:45:01 +08:00
[V1] Minor V1 async engine test refactor (#15075)
Signed-off-by: andoorve <murali.andoorveedu@mail.utoronto.ca> Co-authored-by: andoorve <murali.andoorveedu@mail.utoronto.ca>
This commit is contained in:
parent
374ee287d8
commit
61c7a1b856
@ -76,21 +76,18 @@ async def generate(engine: AsyncLLM,
|
|||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
||||||
@pytest.mark.parametrize("engine_args_and_prompt",
|
@pytest.mark.parametrize("engine_args,prompt",
|
||||||
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
||||||
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_load(
|
async def test_load(monkeypatch: pytest.MonkeyPatch,
|
||||||
monkeypatch: pytest.MonkeyPatch,
|
output_kind: RequestOutputKind,
|
||||||
output_kind: RequestOutputKind,
|
engine_args: AsyncEngineArgs, prompt: PromptType):
|
||||||
engine_args_and_prompt: tuple[AsyncEngineArgs, PromptType],
|
|
||||||
):
|
|
||||||
# TODO(rickyx): Remove monkeypatch once we have a better way to test V1
|
# TODO(rickyx): Remove monkeypatch once we have a better way to test V1
|
||||||
# so that in the future when we switch, we don't have to change all the
|
# so that in the future when we switch, we don't have to change all the
|
||||||
# tests.
|
# tests.
|
||||||
with monkeypatch.context() as m, ExitStack() as after:
|
with monkeypatch.context() as m, ExitStack() as after:
|
||||||
m.setenv("VLLM_USE_V1", "1")
|
m.setenv("VLLM_USE_V1", "1")
|
||||||
engine_args, prompt = engine_args_and_prompt
|
|
||||||
|
|
||||||
engine = AsyncLLM.from_engine_args(engine_args)
|
engine = AsyncLLM.from_engine_args(engine_args)
|
||||||
after.callback(engine.shutdown)
|
after.callback(engine.shutdown)
|
||||||
@ -124,18 +121,16 @@ async def test_load(
|
|||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
"output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
|
||||||
@pytest.mark.parametrize("engine_args_and_prompt",
|
@pytest.mark.parametrize("engine_args,prompt",
|
||||||
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
||||||
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_abort(monkeypatch: pytest.MonkeyPatch,
|
async def test_abort(monkeypatch: pytest.MonkeyPatch,
|
||||||
output_kind: RequestOutputKind,
|
output_kind: RequestOutputKind,
|
||||||
engine_args_and_prompt: tuple[AsyncEngineArgs,
|
engine_args: AsyncEngineArgs, prompt: PromptType):
|
||||||
PromptType]):
|
|
||||||
|
|
||||||
with monkeypatch.context() as m, ExitStack() as after:
|
with monkeypatch.context() as m, ExitStack() as after:
|
||||||
m.setenv("VLLM_USE_V1", "1")
|
m.setenv("VLLM_USE_V1", "1")
|
||||||
engine_args, prompt = engine_args_and_prompt
|
|
||||||
|
|
||||||
engine = AsyncLLM.from_engine_args(engine_args)
|
engine = AsyncLLM.from_engine_args(engine_args)
|
||||||
after.callback(engine.shutdown)
|
after.callback(engine.shutdown)
|
||||||
@ -193,17 +188,15 @@ async def test_abort(monkeypatch: pytest.MonkeyPatch,
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("n", [1, 3])
|
@pytest.mark.parametrize("n", [1, 3])
|
||||||
@pytest.mark.parametrize("engine_args_and_prompt",
|
@pytest.mark.parametrize("engine_args,prompt",
|
||||||
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
[(TEXT_ENGINE_ARGS, TEXT_PROMPT),
|
||||||
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
(VISION_ENGINE_ARGS, VISION_PROMPT)])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_finished_flag(monkeypatch, n: int,
|
async def test_finished_flag(monkeypatch: pytest.MonkeyPatch, n: int,
|
||||||
engine_args_and_prompt: tuple[AsyncEngineArgs,
|
engine_args: AsyncEngineArgs, prompt: PromptType):
|
||||||
PromptType]):
|
|
||||||
|
|
||||||
with monkeypatch.context() as m, ExitStack() as after:
|
with monkeypatch.context() as m, ExitStack() as after:
|
||||||
m.setenv("VLLM_USE_V1", "1")
|
m.setenv("VLLM_USE_V1", "1")
|
||||||
engine_args, prompt = engine_args_and_prompt
|
|
||||||
|
|
||||||
engine = AsyncLLM.from_engine_args(engine_args)
|
engine = AsyncLLM.from_engine_args(engine_args)
|
||||||
after.callback(engine.shutdown)
|
after.callback(engine.shutdown)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user