[CI] Move applicable tests to CPU (#24080)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-03-16 11:57:14 +08:00 · 2025-09-30 09:45:20 -04:00 · 2025-09-30 09:45:20 -04:00 · bc546f76a1
commit bc546f76a1
parent 80608ba5af
39 changed files with 136 additions and 28 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -50,19 +50,28 @@ steps:
  mirror_hardwares: [amdexperimental]
  source_file_dependencies:
  - vllm/
+  - tests/multimodal
+  - tests/utils_
+  commands:
+  - pytest -v -s -m 'not cpu_test' multimodal
+  - pytest -v -s utils_
+
+- label: Async Engine, Inputs, Utils, Worker Test (CPU) # 4 mins
+  timeout_in_minutes: 10
+  source_file_dependencies:
+  - vllm/
  - tests/test_inputs.py
  - tests/test_outputs.py
  - tests/multimodal
-  - tests/utils_
  - tests/standalone_tests/lazy_imports.py
  - tests/transformers_utils
+  no_gpu: true
  commands:
  - python3 standalone_tests/lazy_imports.py
  - pytest -v -s test_inputs.py
  - pytest -v -s test_outputs.py
-  - pytest -v -s multimodal
-  - pytest -v -s utils_ # Utils
-  - pytest -v -s transformers_utils # transformers_utils
+  - pytest -v -s -m 'cpu_test' multimodal
+  - pytest -v -s transformers_utils

 - label: Python-only Installation Test # 10min
  timeout_in_minutes: 20
@ -287,23 +296,34 @@ steps:
    - tests/v1
  commands:
    # split the test to avoid interference
-    - pytest -v -s v1/core
    - pytest -v -s v1/executor
    - pytest -v -s v1/kv_offload
    - pytest -v -s v1/sample
    - pytest -v -s v1/logits_processors
    - pytest -v -s v1/worker
-    - pytest -v -s v1/structured_output
    - pytest -v -s v1/spec_decode
-    - pytest -v -s v1/kv_connector/unit
-    - pytest -v -s v1/metrics
+    - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit
+    - pytest -v -s -m 'not cpu_test' v1/metrics
    - pytest -v -s v1/test_oracle.py
    - pytest -v -s v1/test_request.py
-    - pytest -v -s v1/test_serial_utils.py
    # Integration test for streaming correctness (requires special branch).
    - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api
    - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine

+- label: V1 Test others (CPU) # 5 mins
+  source_file_dependencies:
+    - vllm/
+    - tests/v1
+  no_gpu: true
+  commands:
+    # split the test to avoid interference
+    - pytest -v -s v1/core
+    - pytest -v -s v1/structured_output
+    - pytest -v -s v1/test_serial_utils.py
+    - pytest -v -s -m 'cpu_test' v1/kv_connector/unit
+    - pytest -v -s -m 'cpu_test' v1/metrics
+
+
 - label: Examples Test # 30min
  timeout_in_minutes: 45
  mirror_hardwares: [amdexperimental]
@ -533,10 +553,17 @@ steps:
  source_file_dependencies:
    - vllm/
    - tests/tool_use
-    - tests/mistral_tool_use
  commands:
-    - pytest -v -s tool_use
-    - pytest -v -s mistral_tool_use
+    - pytest -v -s -m 'not cpu_test' tool_use
+
+- label: OpenAI-Compatible Tool Use (CPU) # 5 mins
+  timeout_in_minutes: 10
+  source_file_dependencies:
+    - vllm/
+    - tests/tool_use
+  no_gpu: true
+  commands:
+    - pytest -v -s -m 'cpu_test' tool_use

 #####  models test  #####

@ -576,13 +603,19 @@ steps:
  - vllm/
  - tests/models/test_transformers.py
  - tests/models/test_registry.py
+  commands:
+    - pytest -v -s models/test_transformers.py models/test_registry.py
+
+- label: Basic Models Test (Other CPU) # 5min
+  timeout_in_minutes: 10
+  torch_nightly: true
+  source_file_dependencies:
+  - vllm/
  - tests/models/test_utils.py
  - tests/models/test_vision.py
+  no_gpu: true
  commands:
-    - pytest -v -s models/test_transformers.py \
-                   models/test_registry.py \
-                   models/test_utils.py \
-                   models/test_vision.py
+    - pytest -v -s models/test_utils.py models/test_vision.py

 - label: Language Models Tests (Standard)
  timeout_in_minutes: 25
--- a/.github/mergify.yml
+++ b/.github/mergify.yml
@ -239,7 +239,6 @@ pull_request_rules:
  conditions:
    - or:
      - files~=^tests/tool_use/
-      - files~=^tests/mistral_tool_use/
      - files~=^tests/entrypoints/openai/tool_parsers/
      - files=tests/entrypoints/openai/test_chat_with_tool_reasoning.py
      - files~=^vllm/entrypoints/openai/tool_parsers/
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@ -47,7 +47,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH"

 ENV UV_HTTP_TIMEOUT=500

-# Install Python dependencies 
+# Install Python dependencies
 ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
 ENV UV_INDEX_STRATEGY="unsafe-best-match"
@ -104,7 +104,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/ccache \
    --mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \
    --mount=type=bind,source=.git,target=.git \
-    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel 
+    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel

 ######################### TEST DEPS #########################
 FROM base AS vllm-test-deps
@ -117,7 +117,7 @@ RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
    uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu

 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -r requirements/cpu-test.txt 
+    uv pip install -r requirements/cpu-test.txt

 ######################### DEV IMAGE #########################
 FROM vllm-build AS vllm-dev
@ -130,12 +130,12 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \

 # install development dependencies (for testing)
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -e tests/vllm_test_utils 
+    uv pip install -e tests/vllm_test_utils

 RUN --mount=type=cache,target=/root/.cache/uv \
    --mount=type=cache,target=/root/.cache/ccache \
    --mount=type=bind,source=.git,target=.git \
-    VLLM_TARGET_DEVICE=cpu python3 setup.py develop 
+    VLLM_TARGET_DEVICE=cpu python3 setup.py develop

 COPY --from=vllm-test-deps /workspace/vllm/requirements/cpu-test.txt requirements/test.txt

@ -160,11 +160,12 @@ ADD ./benchmarks/ ./benchmarks/
 ADD ./vllm/collect_env.py .
 ADD ./.buildkite/ ./.buildkite/

+# Create symlink for vllm-workspace to maintain CI compatibility
+RUN ln -sf /workspace /vllm-workspace
+
 # install development dependencies (for testing)
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv pip install -e tests/vllm_test_utils 
-
-ENTRYPOINT ["bash"]
+    uv pip install -e tests/vllm_test_utils

 ######################### RELEASE IMAGE #########################
 FROM base AS vllm-openai
--- a/pyproject.toml
+++ b/pyproject.toml
@ -126,6 +126,7 @@ markers = [
    "core_model: enable this model test in each PR instead of only nightly",
    "hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)",
    "cpu_model: enable this model test in CPU tests",
+    "cpu_test: mark test as CPU-only test",
    "split: run this test as part of a split",
    "distributed: run this test only in distributed GPU tests",
    "skip_v1: do not run this test with v1",
--- a/tests/models/test_utils.py
+++ b/tests/models/test_utils.py
@ -1,10 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+import pytest
 import torch

 from vllm.model_executor.models.utils import AutoWeightsLoader

+pytestmark = pytest.mark.cpu_test
+

 class ModuleWithBatchNorm(torch.nn.Module):

--- a/tests/models/test_vision.py
+++ b/tests/models/test_vision.py
@ -16,6 +16,8 @@ from vllm.model_executor.models.vision import (
 from vllm.platforms import current_platform
 from vllm.utils import get_open_port, update_environment_variables

+pytestmark = pytest.mark.cpu_test
+

@pytest.mark.parametrize(
    ("select_layers", "num_layers_loaded", "max_possible_layers",
--- a/tests/multimodal/test_cache.py
+++ b/tests/multimodal/test_cache.py
@ -19,6 +19,8 @@ from vllm.multimodal.inputs import (MultiModalFieldElem, MultiModalKwargsItem,
                                    MultiModalSharedField)
 from vllm.multimodal.processing import PromptInsertion

+pytestmark = pytest.mark.cpu_test
+

 def _dummy_elem(
    modality: str,
--- a/tests/multimodal/test_hasher.py
+++ b/tests/multimodal/test_hasher.py
@ -10,6 +10,8 @@ from PIL import Image, ImageDraw

 from vllm.multimodal.hasher import MultiModalHasher

+pytestmark = pytest.mark.cpu_test
+
 ASSETS_DIR = Path(__file__).parent / "assets"
 assert ASSETS_DIR.exists()

--- a/tests/multimodal/test_image.py
+++ b/tests/multimodal/test_image.py
@ -8,6 +8,8 @@ from PIL import Image, ImageChops

 from vllm.multimodal.image import ImageMediaIO, convert_image_mode

+pytestmark = pytest.mark.cpu_test
+
 ASSETS_DIR = Path(__file__).parent / "assets"
 assert ASSETS_DIR.exists()

--- a/tests/multimodal/test_inputs.py
+++ b/tests/multimodal/test_inputs.py
@ -1,10 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+import pytest
 import torch

 from vllm.multimodal.inputs import MultiModalKwargs, NestedTensors

+pytestmark = pytest.mark.cpu_test
+

 def assert_nested_tensors_equal(expected: NestedTensors,
                                actual: NestedTensors):
--- a/tests/multimodal/test_processing.py
+++ b/tests/multimodal/test_processing.py
@ -25,6 +25,8 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer

 from .utils import random_image

+pytestmark = pytest.mark.cpu_test
+

 # yapf: disable
@pytest.mark.parametrize(
--- a/tests/multimodal/test_registry.py
+++ b/tests/multimodal/test_registry.py
@ -11,6 +11,8 @@ from vllm.multimodal import MULTIMODAL_REGISTRY

 from ..models.utils import build_model_context

+pytestmark = pytest.mark.cpu_test
+

@pytest.mark.parametrize(
    "model_id,limit_mm_per_prompt,expected",
--- a/tests/multimodal/test_video.py
+++ b/tests/multimodal/test_video.py
@ -17,6 +17,8 @@ from vllm.multimodal.video import (VIDEO_LOADER_REGISTRY, VideoLoader,

 from .utils import cosine_similarity, create_video_from_image, normalize_image

+pytestmark = pytest.mark.cpu_test
+
 NUM_FRAMES = 10
 FAKE_OUTPUT_1 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
 FAKE_OUTPUT_2 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
--- a/tests/test_inputs.py
+++ b/tests/test_inputs.py
@ -6,6 +6,8 @@ import pytest
 from vllm.inputs import zip_enc_dec_prompts
 from vllm.inputs.parse import parse_and_batch_prompt

+pytestmark = pytest.mark.cpu_test
+
 STRING_INPUTS = [
    '',
    'foo',
--- a/tests/test_outputs.py
+++ b/tests/test_outputs.py
@ -1,8 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project

+import pytest
+
 from vllm.outputs import RequestOutput

+pytestmark = pytest.mark.cpu_test
+

 def test_request_output_forward_compatible():
    output = RequestOutput(request_id="test_request_id",
--- a/tests/tool_use/mistral/init.py
+++ b/tests/tool_use/mistral/init.py
--- a/tests/tool_use/mistral/conftest.py
+++ b/tests/tool_use/mistral/conftest.py
@ -12,7 +12,7 @@ from .utils import ARGS, CONFIGS, ServerConfig


 # for each server config, download the model and return the config
-@pytest.fixture(scope="session", params=CONFIGS.keys())
+@pytest.fixture(scope="package", params=CONFIGS.keys())
 def server_config(request):
    config = CONFIGS[request.param]

@ -26,7 +26,7 @@ def server_config(request):


 # run this for each server config
-@pytest.fixture(scope="session")
+@pytest.fixture(scope="package")
 def server(request, server_config: ServerConfig):
    model = server_config["model"]
    args_for_model = server_config["arguments"]
--- a/tests/tool_use/mistral/test_mistral_tool_calls.py
+++ b/tests/tool_use/mistral/test_mistral_tool_calls.py
--- a/tests/tool_use/mistral/utils.py
+++ b/tests/tool_use/mistral/utils.py
--- a/tests/tool_use/test_glm4_moe_tool_parser.py
+++ b/tests/tool_use/test_glm4_moe_tool_parser.py
@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
 from vllm.entrypoints.openai.tool_parsers import Glm4MoeModelToolParser
 from vllm.transformers_utils.tokenizer import get_tokenizer

+pytestmark = pytest.mark.cpu_test
+
 pytest.skip("skip glm4_moe parser test", allow_module_level=True)
 # Use a common model that is likely to be available
 MODEL = "zai-org/GLM-4.5"
--- a/tests/tool_use/test_jamba_tool_parser.py
+++ b/tests/tool_use/test_jamba_tool_parser.py
@ -15,6 +15,8 @@ from vllm.entrypoints.openai.tool_parsers import JambaToolParser
 from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
 from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

+pytestmark = pytest.mark.cpu_test
+
 MODEL = "ai21labs/Jamba-tiny-dev"


--- a/tests/tool_use/test_kimi_k2_tool_parser.py
+++ b/tests/tool_use/test_kimi_k2_tool_parser.py
@ -10,6 +10,8 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
 from vllm.entrypoints.openai.tool_parsers import KimiK2ToolParser
 from vllm.transformers_utils.tokenizer import get_tokenizer

+pytestmark = pytest.mark.cpu_test
+
 # Use a common model that is likely to be available
 MODEL = "moonshotai/Kimi-K2-Instruct"

--- a/tests/tool_use/test_minimax_tool_parser.py
+++ b/tests/tool_use/test_minimax_tool_parser.py
@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionToolsParam,
 from vllm.entrypoints.openai.tool_parsers import MinimaxToolParser
 from vllm.transformers_utils.tokenizer import get_tokenizer

+pytestmark = pytest.mark.cpu_test
+
 # Use a common model that is likely to be available
 MODEL = "MiniMaxAi/MiniMax-M1-40k"

--- a/tests/tool_use/test_qwen3coder_tool_parser.py
+++ b/tests/tool_use/test_qwen3coder_tool_parser.py
@ -18,6 +18,8 @@ from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import (
 from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
 from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

+pytestmark = pytest.mark.cpu_test
+
 MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"


--- a/tests/tool_use/test_seed_oss_tool_parser.py
+++ b/tests/tool_use/test_seed_oss_tool_parser.py
@ -16,6 +16,8 @@ from vllm.entrypoints.openai.tool_parsers import SeedOssToolParser
 from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
 from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

+pytestmark = pytest.mark.cpu_test
+
 # Use a common model that is likely to be available
 MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct"

--- a/tests/tool_use/test_tool_choice_required.py
+++ b/tests/tool_use/test_tool_choice_required.py
@ -12,6 +12,8 @@ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
                                              ChatCompletionToolsParam)
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat

+pytestmark = pytest.mark.cpu_test
+
 EXAMPLE_TOOLS = [
    {
        "type": "function",
--- a/tests/tool_use/test_xlam_tool_parser.py
+++ b/tests/tool_use/test_xlam_tool_parser.py
@ -14,6 +14,8 @@ from vllm.entrypoints.openai.tool_parsers import xLAMToolParser
 from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally
 from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer

+pytestmark = pytest.mark.cpu_test
+
 # Use a common model that is likely to be available
 MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"

--- a/tests/v1/core/test_async_scheduler.py
+++ b/tests/v1/core/test_async_scheduler.py
@ -11,6 +11,8 @@ from vllm.v1.utils import ConstantList

 from .utils import create_requests, create_scheduler

+pytestmark = pytest.mark.cpu_test
+

 def _make_model_runner_output(
    scheduler_output: SchedulerOutput, ) -> ModelRunnerOutput:
--- a/tests/v1/core/test_encoder_cache_manager.py
+++ b/tests/v1/core/test_encoder_cache_manager.py
@ -1,9 +1,12 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest

 from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange
 from vllm.v1.core.encoder_cache_manager import EncoderCacheManager

+pytestmark = pytest.mark.cpu_test
+

 # ------------------ Mock Classes ------------------ #
 class MockRequest:
--- a/tests/v1/core/test_kv_cache_utils.py
+++ b/tests/v1/core/test_kv_cache_utils.py
@ -32,6 +32,8 @@ from vllm.v1.request import Request

 # yapf: enable

+pytestmark = pytest.mark.cpu_test
+

@pytest.fixture(autouse=True)
 def _auto_init_hash_fn(request):
--- a/tests/v1/core/test_prefix_caching.py
+++ b/tests/v1/core/test_prefix_caching.py
@ -25,6 +25,8 @@ from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId,
 from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
                                        KVCacheGroupSpec, SlidingWindowSpec)

+pytestmark = pytest.mark.cpu_test
+

@pytest.fixture(autouse=True)
 def _auto_init_hash_fn(request):
@ -1267,7 +1269,7 @@ def test_kv_cache_events(blocks_to_cache: int):


 def test_eagle_enabled_removes_last_block():
-    """Verify Eagle does NOT remove blocks when request 
+    """Verify Eagle does NOT remove blocks when request
    length is divisible by block size."""
    block_size = 16
    manager = KVCacheManager(
--- a/tests/v1/core/test_scheduler.py
+++ b/tests/v1/core/test_scheduler.py
@ -23,6 +23,8 @@ from vllm.v1.structured_output.request import StructuredOutputRequest

 from .utils import EOS_TOKEN_ID, create_requests, create_scheduler

+pytestmark = pytest.mark.cpu_test
+

 def test_add_requests():
    scheduler = create_scheduler()
--- a/tests/v1/core/test_single_type_kv_cache_manager.py
+++ b/tests/v1/core/test_single_type_kv_cache_manager.py
@ -3,6 +3,7 @@

 import random

+import pytest
 import torch

 from vllm.v1.core.block_pool import BlockPool
@ -13,6 +14,8 @@ from vllm.v1.core.single_type_kv_cache_manager import (
 from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec,
                                        SlidingWindowSpec)

+pytestmark = pytest.mark.cpu_test
+

 def get_sliding_window_manager(sliding_window_spec, block_pool):
    return SlidingWindowManager(sliding_window_spec,
--- a/tests/v1/kv_connector/unit/test_output_aggreagator.py
+++ b/tests/v1/kv_connector/unit/test_output_aggreagator.py
@ -3,9 +3,13 @@
 from concurrent.futures import Future
 from typing import Optional

+import pytest
+
 from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator
 from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput

+pytestmark = pytest.mark.cpu_test
+

 class DummyModelRunnerOutput(ModelRunnerOutput):

--- a/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py
@ -2,12 +2,16 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import copy

+import pytest
+
 from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
 from vllm.v1.request import FinishReason, RequestStatus

 from .utils import (assert_scheduler_empty, create_model_runner_output,
                    create_request, create_scheduler, create_vllm_config)

+pytestmark = pytest.mark.cpu_test
+

 def test_basic_lifecycle():
    """Test lifecycle of a Remote Decode request."""
--- a/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
+++ b/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py
@ -2,12 +2,16 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import copy

+import pytest
+
 from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput
 from vllm.v1.request import FinishReason, RequestStatus

 from .utils import (assert_scheduler_empty, create_model_runner_output,
                    create_request, create_scheduler, create_vllm_config)

+pytestmark = pytest.mark.cpu_test
+

 def test_basic_lifecycle():
    """Test lifecycle of a remote prefill."""
--- a/tests/v1/metrics/test_metrics_reader.py
+++ b/tests/v1/metrics/test_metrics_reader.py
@ -7,6 +7,8 @@ import pytest
 from vllm.v1.metrics.reader import (Counter, Gauge, Histogram, Vector,
                                    get_metrics_snapshot)

+pytestmark = pytest.mark.cpu_test
+

@pytest.fixture(autouse=True)
 def test_registry(monkeypatch):
--- a/tests/v1/structured_output/test_utils.py
+++ b/tests/v1/structured_output/test_utils.py
@ -6,6 +6,8 @@ import pytest
 from vllm.v1.structured_output.backend_xgrammar import (
    has_xgrammar_unsupported_json_features)

+pytestmark = pytest.mark.cpu_test
+

@pytest.fixture
 def unsupported_string_schemas():
--- a/tests/v1/test_serial_utils.py
+++ b/tests/v1/test_serial_utils.py
@ -16,6 +16,8 @@ from vllm.multimodal.inputs import (MultiModalBatchedField,
                                    MultiModalSharedField, NestedTensors)
 from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder

+pytestmark = pytest.mark.cpu_test
+

 class UnrecognizedType(UserDict):