[CI/Build] [3/3] Reorganize entrypoints tests (#5966)

2025-12-10 08:04:58 +08:00 · 2024-06-30 12:58:49 +08:00 · 2024-06-30 12:58:49 +08:00 · 9d47f64eb6
commit 9d47f64eb6
parent cff6a1fec1
17 changed files with 19 additions and 48 deletions
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@ -89,8 +89,8 @@ steps:
  mirror_hardwares: [amd]

  commands:
-  - pytest -v -s entrypoints -m llm
-  - pytest -v -s entrypoints -m openai
+  - pytest -v -s entrypoints/llm
+  - pytest -v -s entrypoints/openai

 - label: Examples Test
  working_dir: "/vllm-workspace/examples"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -69,7 +69,5 @@ skip_gitignore = true
 [tool.pytest.ini_options]
 markers = [
    "skip_global_cleanup",
-    "llm: run tests for vLLM API only",
-    "openai: run tests for OpenAI API only",
    "vlm: run tests for vision language models only",
 ]
--- a/tests/entrypoints/llm/init.py
+++ b/tests/entrypoints/llm/init.py
--- a/tests/entrypoints/llm/test_encode.py
+++ b/tests/entrypoints/llm/test_encode.py
@ -5,7 +5,7 @@ import pytest

 from vllm import LLM, EmbeddingRequestOutput, PoolingParams

-from ..conftest import cleanup
+from ...conftest import cleanup

 MODEL_NAME = "intfloat/e5-mistral-7b-instruct"

@ -25,8 +25,6 @@ TOKEN_IDS = [
    [1000, 1003, 1001, 1002],
 ]

-pytestmark = pytest.mark.llm
-

@pytest.fixture(scope="module")
 def llm():
--- a/tests/entrypoints/llm/test_generate.py
+++ b/tests/entrypoints/llm/test_generate.py
@ -5,7 +5,7 @@ import pytest

 from vllm import LLM, RequestOutput, SamplingParams

-from ..conftest import cleanup
+from ...conftest import cleanup

 MODEL_NAME = "facebook/opt-125m"

@ -23,8 +23,6 @@ TOKEN_IDS = [
    [0, 3, 1, 2],
 ]

-pytestmark = pytest.mark.llm
-

@pytest.fixture(scope="module")
 def llm():
--- a/tests/entrypoints/llm/test_generate_multiple_loras.py
+++ b/tests/entrypoints/llm/test_generate_multiple_loras.py
@ -7,7 +7,7 @@ from huggingface_hub import snapshot_download
 from vllm import LLM
 from vllm.lora.request import LoRARequest

-from ..conftest import cleanup
+from ...conftest import cleanup

 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"

@ -20,8 +20,6 @@ PROMPTS = [

 LORA_NAME = "typeof/zephyr-7b-beta-lora"

-pytestmark = pytest.mark.llm
-

@pytest.fixture(scope="module")
 def llm():
--- a/tests/entrypoints/openai/init.py
+++ b/tests/entrypoints/openai/init.py
--- a/tests/entrypoints/openai/test_chat.py
+++ b/tests/entrypoints/openai/test_chat.py
@ -14,7 +14,7 @@ import torch
 from huggingface_hub import snapshot_download
 from openai import BadRequestError

-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer

 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@ -69,8 +69,6 @@ TEST_CHOICE = [
    "Swift", "Kotlin"
 ]

-pytestmark = pytest.mark.openai
-

@pytest.fixture(scope="module")
 def zephyr_lora_files():
--- a/tests/entrypoints/openai/test_completion.py
+++ b/tests/entrypoints/openai/test_completion.py
@ -16,7 +16,7 @@ from openai import BadRequestError

 from vllm.transformers_utils.tokenizer import get_tokenizer

-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer

 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@ -71,8 +71,6 @@ TEST_CHOICE = [
    "Swift", "Kotlin"
 ]

-pytestmark = pytest.mark.openai
-

@pytest.fixture(scope="module")
 def zephyr_lora_files():
--- a/tests/entrypoints/openai/test_embedding.py
+++ b/tests/entrypoints/openai/test_embedding.py
@ -2,12 +2,10 @@ import openai
 import pytest
 import ray

-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer

 EMBEDDING_MODEL_NAME = "intfloat/e5-mistral-7b-instruct"

-pytestmark = pytest.mark.openai
-

@pytest.fixture(scope="module")
 def ray_ctx():
--- a/tests/entrypoints/openai/test_guided_processors.py
+++ b/tests/entrypoints/openai/test_guided_processors.py
@ -52,8 +52,6 @@ TEST_SCHEMA = {
 TEST_REGEX = (r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}"
              r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)")

-pytestmark = pytest.mark.openai
-

 def test_guided_logits_processors():
    """Basic unit test for RegexLogitsProcessor and JSONLogitsProcessor."""
--- a/tests/entrypoints/openai/test_models.py
+++ b/tests/entrypoints/openai/test_models.py
@ -6,7 +6,7 @@ import ray
 # downloading lora to test lora requests
 from huggingface_hub import snapshot_download

-from ..utils import RemoteOpenAIServer
+from ...utils import RemoteOpenAIServer

 # any model with a chat template should work here
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@ -14,8 +14,6 @@ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 # generation quality here
 LORA_NAME = "typeof/zephyr-7b-beta-lora"

-pytestmark = pytest.mark.openai
-

@pytest.fixture(scope="module")
 def zephyr_lora_files():
--- a/tests/entrypoints/openai/test_oot_registration.py
+++ b/tests/entrypoints/openai/test_oot_registration.py
@ -1,7 +1,6 @@
 import sys
 import time

-import pytest
 import torch
 from openai import OpenAI, OpenAIError

@ -10,8 +9,6 @@ from vllm.model_executor.models.opt import OPTForCausalLM
 from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.utils import get_open_port

-pytestmark = pytest.mark.openai
-

 class MyOPTForCausalLM(OPTForCausalLM):

--- a/tests/entrypoints/openai/test_run_batch.py
+++ b/tests/entrypoints/openai/test_run_batch.py
--- a/tests/entrypoints/openai/test_serving_chat.py
+++ b/tests/entrypoints/openai/test_serving_chat.py
@ -1,15 +1,11 @@
 import asyncio
 from dataclasses import dataclass

-import pytest
-
 from vllm.entrypoints.openai.serving_chat import OpenAIServingChat

 MODEL_NAME = "openai-community/gpt2"
 CHAT_TEMPLATE = "Dummy chat template for testing {}"

-pytestmark = pytest.mark.openai
-

@dataclass
 class MockModelConfig:
--- a/tests/entrypoints/openai/test_vision.py
+++ b/tests/entrypoints/openai/test_vision.py
@ -1,4 +1,3 @@
-from pathlib import Path
 from typing import Dict, List

 import openai
@ -8,12 +7,12 @@ import ray

 from vllm.multimodal.utils import ImageFetchAiohttp, encode_image_base64

-from ..utils import RemoteOpenAIServer
+from ...utils import VLLM_PATH, RemoteOpenAIServer

 MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
-LLAVA_CHAT_TEMPLATE = (Path(__file__).parent.parent.parent /
-                       "examples/template_llava.jinja")
+LLAVA_CHAT_TEMPLATE = VLLM_PATH / "examples/template_llava.jinja"
 assert LLAVA_CHAT_TEMPLATE.exists()
+
 # Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
 TEST_IMAGE_URLS = [
    "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
@ -22,8 +21,6 @@ TEST_IMAGE_URLS = [
    "https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
 ]

-pytestmark = pytest.mark.openai
-

@pytest.fixture(scope="module")
 def ray_ctx():
@ -279,7 +276,3 @@ async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
    )
    completion = completion.choices[0].text
    assert completion is not None and len(completion) >= 0
-
-
-if __name__ == "__main__":
-    pytest.main([__file__])
--- a/tests/utils.py
+++ b/tests/utils.py
@ -4,7 +4,8 @@ import sys
 import time
 import warnings
 from contextlib import contextmanager
-from typing import Dict, List
+from pathlib import Path
+from typing import Any, Dict, List

 import openai
 import ray
@ -40,8 +41,8 @@ else:
            nvmlShutdown()


-# Path to root of repository so that utilities can be imported by ray workers
-VLLM_PATH = os.path.abspath(os.path.join(__file__, os.pardir, os.pardir))
+VLLM_PATH = Path(__file__).parent.parent
+"""Path to root of the vLLM repository."""


 class RemoteOpenAIServer:
@ -153,10 +154,12 @@ def init_test_distributed_environment(
 def multi_process_parallel(
    tp_size: int,
    pp_size: int,
-    test_target,
+    test_target: Any,
 ) -> None:
    # Using ray helps debugging the error when it failed
    # as compared to multiprocessing.
+    # NOTE: We need to set working_dir for distributed tests,
+    # otherwise we may get import errors on ray workers
    ray.init(runtime_env={"working_dir": VLLM_PATH})

    distributed_init_port = get_open_port()