mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-08 03:42:26 +08:00
Adding "AMD: Multi-step Tests" to amdproduction. (#19508)
Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
This commit is contained in:
parent
aafbbd981f
commit
d65668b4e8
@ -675,7 +675,7 @@ steps:
|
|||||||
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
|
- pytest -v -s plugins/lora_resolvers # unit tests for in-tree lora resolver plugins
|
||||||
|
|
||||||
- label: Multi-step Tests (4 GPUs) # 36min
|
- label: Multi-step Tests (4 GPUs) # 36min
|
||||||
mirror_hardwares: [amdexperimental]
|
mirror_hardwares: [amdexperimental, amdproduction]
|
||||||
working_dir: "/vllm-workspace/tests"
|
working_dir: "/vllm-workspace/tests"
|
||||||
num_gpus: 4
|
num_gpus: 4
|
||||||
source_file_dependencies:
|
source_file_dependencies:
|
||||||
|
|||||||
@ -8,6 +8,7 @@ from typing import Optional
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from vllm.platforms import current_platform
|
||||||
from vllm.utils import STR_BACKEND_ENV_VAR
|
from vllm.utils import STR_BACKEND_ENV_VAR
|
||||||
|
|
||||||
from ..models.utils import check_logprobs_close, check_outputs_equal
|
from ..models.utils import check_logprobs_close, check_outputs_equal
|
||||||
@ -71,6 +72,12 @@ def test_multi_step_llm(
|
|||||||
num_logprobs: corresponds to the `logprobs` argument to the OpenAI
|
num_logprobs: corresponds to the `logprobs` argument to the OpenAI
|
||||||
completions endpoint; `None` -> 1 logprob returned.
|
completions endpoint; `None` -> 1 logprob returned.
|
||||||
"""
|
"""
|
||||||
|
if current_platform.is_rocm() and \
|
||||||
|
(attention_backend == "FLASHINFER" or enable_chunked_prefill):
|
||||||
|
pytest.skip(
|
||||||
|
"Multi-Step with FLASHINFER or Chunked-Prefill is not supported"
|
||||||
|
"on ROCm")
|
||||||
|
|
||||||
with monkeypatch.context() as m:
|
with monkeypatch.context() as m:
|
||||||
m.setenv(STR_BACKEND_ENV_VAR, attention_backend)
|
m.setenv(STR_BACKEND_ENV_VAR, attention_backend)
|
||||||
|
|
||||||
@ -221,6 +228,9 @@ def test_multi_step_llm_w_prompt_logprobs(
|
|||||||
@pytest.mark.parametrize("num_prompts", NUM_PROMPTS)
|
@pytest.mark.parametrize("num_prompts", NUM_PROMPTS)
|
||||||
@pytest.mark.parametrize("num_logprobs", [None, 5])
|
@pytest.mark.parametrize("num_logprobs", [None, 5])
|
||||||
@pytest.mark.parametrize("attention_backend", ["FLASH_ATTN"])
|
@pytest.mark.parametrize("attention_backend", ["FLASH_ATTN"])
|
||||||
|
@pytest.mark.skipif(
|
||||||
|
current_platform.is_rocm(),
|
||||||
|
reason="Multi-Step + Chunked-Prefill not supported on ROCm")
|
||||||
def test_multi_step_llm_chunked_prefill_prefix_cache(
|
def test_multi_step_llm_chunked_prefill_prefix_cache(
|
||||||
vllm_runner,
|
vllm_runner,
|
||||||
example_prompts,
|
example_prompts,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user