mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-09 22:25:32 +08:00
[CI] Move applicable tests to CPU (#24080)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
parent
80608ba5af
commit
bc546f76a1
@ -50,19 +50,28 @@ steps:
|
||||
mirror_hardwares: [amdexperimental]
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/multimodal
|
||||
- tests/utils_
|
||||
commands:
|
||||
- pytest -v -s -m 'not cpu_test' multimodal
|
||||
- pytest -v -s utils_
|
||||
|
||||
- label: Async Engine, Inputs, Utils, Worker Test (CPU) # 4 mins
|
||||
timeout_in_minutes: 10
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/test_inputs.py
|
||||
- tests/test_outputs.py
|
||||
- tests/multimodal
|
||||
- tests/utils_
|
||||
- tests/standalone_tests/lazy_imports.py
|
||||
- tests/transformers_utils
|
||||
no_gpu: true
|
||||
commands:
|
||||
- python3 standalone_tests/lazy_imports.py
|
||||
- pytest -v -s test_inputs.py
|
||||
- pytest -v -s test_outputs.py
|
||||
- pytest -v -s multimodal
|
||||
- pytest -v -s utils_ # Utils
|
||||
- pytest -v -s transformers_utils # transformers_utils
|
||||
- pytest -v -s -m 'cpu_test' multimodal
|
||||
- pytest -v -s transformers_utils
|
||||
|
||||
- label: Python-only Installation Test # 10min
|
||||
timeout_in_minutes: 20
|
||||
@ -287,23 +296,34 @@ steps:
|
||||
- tests/v1
|
||||
commands:
|
||||
# split the test to avoid interference
|
||||
- pytest -v -s v1/core
|
||||
- pytest -v -s v1/executor
|
||||
- pytest -v -s v1/kv_offload
|
||||
- pytest -v -s v1/sample
|
||||
- pytest -v -s v1/logits_processors
|
||||
- pytest -v -s v1/worker
|
||||
- pytest -v -s v1/structured_output
|
||||
- pytest -v -s v1/spec_decode
|
||||
- pytest -v -s v1/kv_connector/unit
|
||||
- pytest -v -s v1/metrics
|
||||
- pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
|
||||
- pytest -v -s -m 'not cpu_test' v1/metrics
|
||||
- pytest -v -s v1/test_oracle.py
|
||||
- pytest -v -s v1/test_request.py
|
||||
- pytest -v -s v1/test_serial_utils.py
|
||||
# Integration test for streaming correctness (requires special branch).
|
||||
- pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
|
||||
- pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine
|
||||
|
||||
- label: V1 Test others (CPU) # 5 mins
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/v1
|
||||
no_gpu: true
|
||||
commands:
|
||||
# split the test to avoid interference
|
||||
- pytest -v -s v1/core
|
||||
- pytest -v -s v1/structured_output
|
||||
- pytest -v -s v1/test_serial_utils.py
|
||||
- pytest -v -s -m 'cpu_test' v1/kv_connector/unit
|
||||
- pytest -v -s -m 'cpu_test' v1/metrics
|
||||
|
||||
|
||||
- label: Examples Test # 30min
|
||||
timeout_in_minutes: 45
|
||||
mirror_hardwares: [amdexperimental]
|
||||
@ -533,10 +553,17 @@ steps:
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/tool_use
|
||||
- tests/mistral_tool_use
|
||||
commands:
|
||||
- pytest -v -s tool_use
|
||||
- pytest -v -s mistral_tool_use
|
||||
- pytest -v -s -m 'not cpu_test' tool_use
|
||||
|
||||
- label: OpenAI-Compatible Tool Use (CPU) # 5 mins
|
||||
timeout_in_minutes: 10
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/tool_use
|
||||
no_gpu: true
|
||||
commands:
|
||||
- pytest -v -s -m 'cpu_test' tool_use
|
||||
|
||||
##### models test #####
|
||||
|
||||
@ -576,13 +603,19 @@ steps:
|
||||
- vllm/
|
||||
- tests/models/test_transformers.py
|
||||
- tests/models/test_registry.py
|
||||
commands:
|
||||
- pytest -v -s models/test_transformers.py models/test_registry.py
|
||||
|
||||
- label: Basic Models Test (Other CPU) # 5min
|
||||
timeout_in_minutes: 10
|
||||
torch_nightly: true
|
||||
source_file_dependencies:
|
||||
- vllm/
|
||||
- tests/models/test_utils.py
|
||||
- tests/models/test_vision.py
|
||||
no_gpu: true
|
||||
commands:
|
||||
- pytest -v -s models/test_transformers.py \
|
||||
models/test_registry.py \
|
||||
models/test_utils.py \
|
||||
models/test_vision.py
|
||||
- pytest -v -s models/test_utils.py models/test_vision.py
|
||||
|
||||
- label: Language Models Tests (Standard)
|
||||
timeout_in_minutes: 25
|
||||
|
||||
1
.github/mergify.yml
vendored
1
.github/mergify.yml
vendored
@ -239,7 +239,6 @@ pull_request_rules:
|
||||
conditions:
|
||||
- or:
|
||||
- files~=^tests/tool_use/
|
||||
- files~=^tests/mistral_tool_use/
|
||||
- files~=^tests/entrypoints/openai/tool_parsers/
|
||||
- files=tests/entrypoints/openai/test_chat_with_tool_reasoning.py
|
||||
- files~=^vllm/entrypoints/openai/tool_parsers/
|
||||
|
||||
@ -47,7 +47,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
||||
|
||||
ENV UV_HTTP_TIMEOUT=500
|
||||
|
||||
# Install Python dependencies
|
||||
# Install Python dependencies
|
||||
ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
||||
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
|
||||
ENV UV_INDEX_STRATEGY="unsafe-best-match"
|
||||
@ -104,7 +104,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
--mount=type=cache,target=/root/.cache/ccache \
|
||||
--mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \
|
||||
--mount=type=bind,source=.git,target=.git \
|
||||
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
|
||||
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
|
||||
|
||||
######################### TEST DEPS #########################
|
||||
FROM base AS vllm-test-deps
|
||||
@ -117,7 +117,7 @@ RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
|
||||
uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install -r requirements/cpu-test.txt
|
||||
uv pip install -r requirements/cpu-test.txt
|
||||
|
||||
######################### DEV IMAGE #########################
|
||||
FROM vllm-build AS vllm-dev
|
||||
@ -130,12 +130,12 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
|
||||
# install development dependencies (for testing)
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install -e tests/vllm_test_utils
|
||||
uv pip install -e tests/vllm_test_utils
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
--mount=type=cache,target=/root/.cache/ccache \
|
||||
--mount=type=bind,source=.git,target=.git \
|
||||
VLLM_TARGET_DEVICE=cpu python3 setup.py develop
|
||||
VLLM_TARGET_DEVICE=cpu python3 setup.py develop
|
||||
|
||||
COPY --from=vllm-test-deps /workspace/vllm/requirements/cpu-test.txt requirements/test.txt
|
||||
|
||||
@ -160,11 +160,12 @@ ADD ./benchmarks/ ./benchmarks/
|
||||
ADD ./vllm/collect_env.py .
|
||||
ADD ./.buildkite/ ./.buildkite/
|
||||
|
||||
# Create symlink for vllm-workspace to maintain CI compatibility
|
||||
RUN ln -sf /workspace /vllm-workspace
|
||||
|
||||
# install development dependencies (for testing)
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install -e tests/vllm_test_utils
|
||||
|
||||
ENTRYPOINT ["bash"]
|
||||
uv pip install -e tests/vllm_test_utils
|
||||
|
||||
######################### RELEASE IMAGE #########################
|
||||
FROM base AS vllm-openai
|
||||
|
||||
@ -126,6 +126,7 @@ markers = [
|
||||
"core_model: enable this model test in each PR instead of only nightly",
|
||||
"hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)",
|
||||
"cpu_model: enable this model test in CPU tests",
|
||||
"cpu_test: mark test as CPU-only test",
|
||||
"split: run this test as part of a split",
|
||||
"distributed: run this test only in distributed GPU tests",
|
||||
"skip_v1: do not run this test with v1",
|
||||
|
||||
@ -1,10 +1,13 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from vllm.model_executor.models.utils import AutoWeightsLoader
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
class ModuleWithBatchNorm(torch.nn.Module):
|
||||
|
||||
|
||||
@ -16,6 +16,8 @@ from vllm.model_executor.models.vision import (
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import get_open_port, update_environment_variables
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("select_layers", "num_layers_loaded", "max_possible_layers",
|
||||
|
||||
@ -19,6 +19,8 @@ from vllm.multimodal.inputs import (MultiModalFieldElem, MultiModalKwargsItem,
|
||||
MultiModalSharedField)
|
||||
from vllm.multimodal.processing import PromptInsertion
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def _dummy_elem(
|
||||
modality: str,
|
||||
|
||||
@ -10,6 +10,8 @@ from PIL import Image, ImageDraw
|
||||
|
||||
from vllm.multimodal.hasher import MultiModalHasher
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
ASSETS_DIR = Path(__file__).parent / "assets"
|
||||
assert ASSETS_DIR.exists()
|
||||
|
||||
|
||||
@ -8,6 +8,8 @@ from PIL import Image, ImageChops
|
||||
|
||||
from vllm.multimodal.image import ImageMediaIO, convert_image_mode
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
ASSETS_DIR = Path(__file__).parent / "assets"
|
||||
assert ASSETS_DIR.exists()
|
||||
|
||||
|
||||
@ -1,10 +1,13 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from vllm.multimodal.inputs import MultiModalKwargs, NestedTensors
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def assert_nested_tensors_equal(expected: NestedTensors,
|
||||
actual: NestedTensors):
|
||||
|
||||
@ -25,6 +25,8 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer
|
||||
|
||||
from .utils import random_image
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
# yapf: disable
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@ -11,6 +11,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
|
||||
from ..models.utils import build_model_context
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model_id,limit_mm_per_prompt,expected",
|
||||
|
||||
@ -17,6 +17,8 @@ from vllm.multimodal.video import (VIDEO_LOADER_REGISTRY, VideoLoader,
|
||||
|
||||
from .utils import cosine_similarity, create_video_from_image, normalize_image
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
NUM_FRAMES = 10
|
||||
FAKE_OUTPUT_1 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
|
||||
FAKE_OUTPUT_2 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
|
||||
|
||||
@ -6,6 +6,8 @@ import pytest
|
||||
from vllm.inputs import zip_enc_dec_prompts
|
||||
from vllm.inputs.parse import parse_and_batch_prompt
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
STRING_INPUTS = [
|
||||
'',
|
||||
'foo',
|
||||
|
||||
@ -1,8 +1,12 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.outputs import RequestOutput
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def test_request_output_forward_compatible():
|
||||
output = RequestOutput(request_id="test_request_id",
|
||||
|
||||
@ -12,7 +12,7 @@ from .utils import ARGS, CONFIGS, ServerConfig
|
||||
|
||||
|
||||
# for each server config, download the model and return the config
|
||||
@pytest.fixture(scope="session", params=CONFIGS.keys())
|
||||
@pytest.fixture(scope="package", params=CONFIGS.keys())
|
||||
def server_config(request):
|
||||
config = CONFIGS[request.param]
|
||||
|
||||
@ -26,7 +26,7 @@ def server_config(request):
|
||||
|
||||
|
||||
# run this for each server config
|
||||
@pytest.fixture(scope="session")
|
||||
@pytest.fixture(scope="package")
|
||||
def server(request, server_config: ServerConfig):
|
||||
model = server_config["model"]
|
||||
args_for_model = server_config["arguments"]
|
||||
@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
||||
from vllm.entrypoints.openai.tool_parsers import Glm4MoeModelToolParser
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
pytest.skip("skip glm4_moe parser test", allow_module_level=True)
|
||||
# Use a common model that is likely to be available
|
||||
MODEL = "zai-org/GLM-4.5"
|
||||
|
||||
@ -15,6 +15,8 @@ from vllm.entrypoints.openai.tool_parsers import JambaToolParser
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
MODEL = "ai21labs/Jamba-tiny-dev"
|
||||
|
||||
|
||||
|
||||
@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
|
||||
from vllm.entrypoints.openai.tool_parsers import KimiK2ToolParser
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
# Use a common model that is likely to be available
|
||||
MODEL = "moonshotai/Kimi-K2-Instruct"
|
||||
|
||||
|
||||
@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam,
|
||||
from vllm.entrypoints.openai.tool_parsers import MinimaxToolParser
|
||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
# Use a common model that is likely to be available
|
||||
MODEL = "MiniMaxAi/MiniMax-M1-40k"
|
||||
|
||||
|
||||
@ -18,6 +18,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import (
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"
|
||||
|
||||
|
||||
|
||||
@ -16,6 +16,8 @@ from vllm.entrypoints.openai.tool_parsers import SeedOssToolParser
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
# Use a common model that is likely to be available
|
||||
MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct"
|
||||
|
||||
|
||||
@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
|
||||
ChatCompletionToolsParam)
|
||||
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
EXAMPLE_TOOLS = [
|
||||
{
|
||||
"type": "function",
|
||||
|
||||
@ -14,6 +14,8 @@ from vllm.entrypoints.openai.tool_parsers import xLAMToolParser
|
||||
from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
# Use a common model that is likely to be available
|
||||
MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"
|
||||
|
||||
|
||||
@ -11,6 +11,8 @@ from vllm.v1.utils import ConstantList
|
||||
|
||||
from .utils import create_requests, create_scheduler
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def _make_model_runner_output(
|
||||
scheduler_output: SchedulerOutput, ) -> ModelRunnerOutput:
|
||||
|
||||
@ -1,9 +1,12 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import pytest
|
||||
|
||||
from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange
|
||||
from vllm.v1.core.encoder_cache_manager import EncoderCacheManager
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
# ------------------ Mock Classes ------------------ #
|
||||
class MockRequest:
|
||||
|
||||
@ -32,6 +32,8 @@ from vllm.v1.request import Request
|
||||
|
||||
# yapf: enable
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _auto_init_hash_fn(request):
|
||||
|
||||
@ -25,6 +25,8 @@ from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId,
|
||||
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
|
||||
KVCacheGroupSpec, SlidingWindowSpec)
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _auto_init_hash_fn(request):
|
||||
@ -1267,7 +1269,7 @@ def test_kv_cache_events(blocks_to_cache: int):
|
||||
|
||||
|
||||
def test_eagle_enabled_removes_last_block():
|
||||
"""Verify Eagle does NOT remove blocks when request
|
||||
"""Verify Eagle does NOT remove blocks when request
|
||||
length is divisible by block size."""
|
||||
block_size = 16
|
||||
manager = KVCacheManager(
|
||||
|
||||
@ -23,6 +23,8 @@ from vllm.v1.structured_output.request import StructuredOutputRequest
|
||||
|
||||
from .utils import EOS_TOKEN_ID, create_requests, create_scheduler
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def test_add_requests():
|
||||
scheduler = create_scheduler()
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
|
||||
import random
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from vllm.v1.core.block_pool import BlockPool
|
||||
@ -13,6 +14,8 @@ from vllm.v1.core.single_type_kv_cache_manager import (
|
||||
from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec,
|
||||
SlidingWindowSpec)
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def get_sliding_window_manager(sliding_window_spec, block_pool):
|
||||
return SlidingWindowManager(sliding_window_spec,
|
||||
|
||||
@ -3,9 +3,13 @@
|
||||
from concurrent.futures import Future
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
|
||||
from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
class DummyModelRunnerOutput(ModelRunnerOutput):
|
||||
|
||||
|
||||
@ -2,12 +2,16 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import copy
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
|
||||
from vllm.v1.request import FinishReason, RequestStatus
|
||||
|
||||
from .utils import (assert_scheduler_empty, create_model_runner_output,
|
||||
create_request, create_scheduler, create_vllm_config)
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def test_basic_lifecycle():
|
||||
"""Test lifecycle of a Remote Decode request."""
|
||||
|
||||
@ -2,12 +2,16 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import copy
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
|
||||
from vllm.v1.request import FinishReason, RequestStatus
|
||||
|
||||
from .utils import (assert_scheduler_empty, create_model_runner_output,
|
||||
create_request, create_scheduler, create_vllm_config)
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
def test_basic_lifecycle():
|
||||
"""Test lifecycle of a remote prefill."""
|
||||
|
||||
@ -7,6 +7,8 @@ import pytest
|
||||
from vllm.v1.metrics.reader import (Counter, Gauge, Histogram, Vector,
|
||||
get_metrics_snapshot)
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def test_registry(monkeypatch):
|
||||
|
||||
@ -6,6 +6,8 @@ import pytest
|
||||
from vllm.v1.structured_output.backend_xgrammar import (
|
||||
has_xgrammar_unsupported_json_features)
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unsupported_string_schemas():
|
||||
|
||||
@ -16,6 +16,8 @@ from vllm.multimodal.inputs import (MultiModalBatchedField,
|
||||
MultiModalSharedField, NestedTensors)
|
||||
from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
class UnrecognizedType(UserDict):
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user