From a8b0361c9229c2584eea0a035e650e55e2d52f4e Mon Sep 17 00:00:00 2001
From: "wang.yuqi" <noooop@126.com>
Date: Thu, 11 Sep 2025 16:53:09 +0800
Subject: [PATCH] [CI] Split pooling from entrypoints Test (#24632)

Signed-off-by: wang.yuqi <noooop@126.com>
---
 .buildkite/test-pipeline.yaml                     | 15 ++++++++++++++-
 tests/entrypoints/pooling/__init__.py             |  0
 tests/entrypoints/pooling/correctness/__init__.py |  0
 .../correctness/test_mteb_embed.py                |  0
 .../correctness/test_mteb_score.py                |  0
 tests/entrypoints/pooling/llm/__init__.py         |  0
 .../{ => pooling}/llm/test_classify.py            |  3 +--
 .../{ => pooling}/llm/test_embedding.py           |  0
 .../entrypoints/{ => pooling}/llm/test_encode.py  |  0
 .../entrypoints/{ => pooling}/llm/test_reward.py  |  3 +--
 tests/entrypoints/{ => pooling}/llm/test_score.py |  3 +--
 tests/entrypoints/pooling/openai/__init__.py      |  0
 .../{ => pooling}/openai/test_classification.py   |  3 +--
 .../{ => pooling}/openai/test_embedding.py        |  9 ++++-----
 .../openai/test_embedding_dimensions.py           | 11 +++++------
 .../openai/test_embedding_long_text.py            |  3 +--
 .../{ => pooling}/openai/test_pooling.py          |  3 +--
 .../{ => pooling}/openai/test_rerank.py           |  3 +--
 .../{ => pooling}/openai/test_score.py            |  3 +--
 .../{ => pooling}/openai/test_truncation.py       |  0
 .../{ => pooling}/openai/test_vision_embedding.py |  3 +--
 21 files changed, 32 insertions(+), 30 deletions(-)
 create mode 100644 tests/entrypoints/pooling/__init__.py
 create mode 100644 tests/entrypoints/pooling/correctness/__init__.py
 rename tests/entrypoints/{openai => pooling}/correctness/test_mteb_embed.py (100%)
 rename tests/entrypoints/{openai => pooling}/correctness/test_mteb_score.py (100%)
 create mode 100644 tests/entrypoints/pooling/llm/__init__.py
 rename tests/entrypoints/{ => pooling}/llm/test_classify.py (98%)
 rename tests/entrypoints/{ => pooling}/llm/test_embedding.py (100%)
 rename tests/entrypoints/{ => pooling}/llm/test_encode.py (100%)
 rename tests/entrypoints/{ => pooling}/llm/test_reward.py (97%)
 rename tests/entrypoints/{ => pooling}/llm/test_score.py (97%)
 create mode 100644 tests/entrypoints/pooling/openai/__init__.py
 rename tests/entrypoints/{ => pooling}/openai/test_classification.py (99%)
 rename tests/entrypoints/{ => pooling}/openai/test_embedding.py (98%)
 rename tests/entrypoints/{ => pooling}/openai/test_embedding_dimensions.py (95%)
 rename tests/entrypoints/{ => pooling}/openai/test_embedding_long_text.py (99%)
 rename tests/entrypoints/{ => pooling}/openai/test_pooling.py (99%)
 rename tests/entrypoints/{ => pooling}/openai/test_rerank.py (99%)
 rename tests/entrypoints/{ => pooling}/openai/test_score.py (99%)
 rename tests/entrypoints/{ => pooling}/openai/test_truncation.py (100%)
 rename tests/entrypoints/{ => pooling}/openai/test_vision_embedding.py (98%)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 0dbde21f36d84..3f6b67e45de17 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -113,7 +113,7 @@ steps:
   - tests/entrypoints/
   commands:
   - pytest -v -s entrypoints/openai/tool_parsers
-  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py
+  - pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py  --ignore=entrypoints/pooling
 
 - label: Entrypoints Integration Test (LLM) # 30min
   timeout_in_minutes: 40
@@ -148,6 +148,19 @@ steps:
   - pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_collective_rpc.py --ignore=entrypoints/openai/tool_parsers/
   - pytest -v -s entrypoints/test_chat_utils.py
 
+- label: Entrypoints Integration Test (Pooling)
+  timeout_in_minutes: 50
+  mirror_hardwares: [amdexperimental]
+  working_dir: "/vllm-workspace/tests"
+  fast_check: true
+  torch_nightly: true
+  source_file_dependencies:
+  - vllm/
+  - tests/entrypoints/pooling
+  commands:
+  - export VLLM_WORKER_MULTIPROC_METHOD=spawn
+  - pytest -v -s entrypoints/pooling
+
 - label: Distributed Tests (4 GPUs) # 35min
   timeout_in_minutes: 50
   mirror_hardwares: [amdexperimental]
diff --git a/tests/entrypoints/pooling/__init__.py b/tests/entrypoints/pooling/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/entrypoints/pooling/correctness/__init__.py b/tests/entrypoints/pooling/correctness/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/entrypoints/openai/correctness/test_mteb_embed.py b/tests/entrypoints/pooling/correctness/test_mteb_embed.py
similarity index 100%
rename from tests/entrypoints/openai/correctness/test_mteb_embed.py
rename to tests/entrypoints/pooling/correctness/test_mteb_embed.py
diff --git a/tests/entrypoints/openai/correctness/test_mteb_score.py b/tests/entrypoints/pooling/correctness/test_mteb_score.py
similarity index 100%
rename from tests/entrypoints/openai/correctness/test_mteb_score.py
rename to tests/entrypoints/pooling/correctness/test_mteb_score.py
diff --git a/tests/entrypoints/pooling/llm/__init__.py b/tests/entrypoints/pooling/llm/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/entrypoints/llm/test_classify.py b/tests/entrypoints/pooling/llm/test_classify.py
similarity index 98%
rename from tests/entrypoints/llm/test_classify.py
rename to tests/entrypoints/pooling/llm/test_classify.py
index 6c0c9cd015801..ff5cea11a9182 100644
--- a/tests/entrypoints/llm/test_classify.py
+++ b/tests/entrypoints/pooling/llm/test_classify.py
@@ -6,11 +6,10 @@ import weakref
 import pytest
 import torch
 
+from tests.models.utils import softmax
 from vllm import LLM, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 
-from ...models.utils import softmax
-
 MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
 
 prompts = ["The chef prepared a delicious meal."]
diff --git a/tests/entrypoints/llm/test_embedding.py b/tests/entrypoints/pooling/llm/test_embedding.py
similarity index 100%
rename from tests/entrypoints/llm/test_embedding.py
rename to tests/entrypoints/pooling/llm/test_embedding.py
diff --git a/tests/entrypoints/llm/test_encode.py b/tests/entrypoints/pooling/llm/test_encode.py
similarity index 100%
rename from tests/entrypoints/llm/test_encode.py
rename to tests/entrypoints/pooling/llm/test_encode.py
diff --git a/tests/entrypoints/llm/test_reward.py b/tests/entrypoints/pooling/llm/test_reward.py
similarity index 97%
rename from tests/entrypoints/llm/test_reward.py
rename to tests/entrypoints/pooling/llm/test_reward.py
index 2cee3c8d94e36..11d164c978a92 100644
--- a/tests/entrypoints/llm/test_reward.py
+++ b/tests/entrypoints/pooling/llm/test_reward.py
@@ -6,11 +6,10 @@ import weakref
 import pytest
 import torch
 
+from tests.models.utils import softmax
 from vllm import LLM, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 
-from ...models.utils import softmax
-
 MODEL_NAME = "internlm/internlm2-1_8b-reward"
 
 prompts = ["The chef prepared a delicious meal."]
diff --git a/tests/entrypoints/llm/test_score.py b/tests/entrypoints/pooling/llm/test_score.py
similarity index 97%
rename from tests/entrypoints/llm/test_score.py
rename to tests/entrypoints/pooling/llm/test_score.py
index f715dacacb8ff..447378f989d09 100644
--- a/tests/entrypoints/llm/test_score.py
+++ b/tests/entrypoints/pooling/llm/test_score.py
@@ -6,11 +6,10 @@ import weakref
 import pytest
 import torch
 
+from tests.models.utils import softmax
 from vllm import LLM, PoolingParams
 from vllm.distributed import cleanup_dist_env_and_memory
 
-from ...models.utils import softmax
-
 MODEL_NAME = "tomaarsen/Qwen3-Reranker-0.6B-seq-cls"
 
 
diff --git a/tests/entrypoints/pooling/openai/__init__.py b/tests/entrypoints/pooling/openai/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/entrypoints/openai/test_classification.py b/tests/entrypoints/pooling/openai/test_classification.py
similarity index 99%
rename from tests/entrypoints/openai/test_classification.py
rename to tests/entrypoints/pooling/openai/test_classification.py
index 36c96d76c2e5f..26c2c8e6af17d 100644
--- a/tests/entrypoints/openai/test_classification.py
+++ b/tests/entrypoints/pooling/openai/test_classification.py
@@ -6,10 +6,9 @@ import requests
 import torch
 import torch.nn.functional as F
 
+from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import ClassificationResponse
 
-from ...utils import RemoteOpenAIServer
-
 MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
 DTYPE = "float32"  # Use float32 to avoid NaN issue
 
diff --git a/tests/entrypoints/openai/test_embedding.py b/tests/entrypoints/pooling/openai/test_embedding.py
similarity index 98%
rename from tests/entrypoints/openai/test_embedding.py
rename to tests/entrypoints/pooling/openai/test_embedding.py
index d46ab304ba6d5..37a10e79d4fc7 100644
--- a/tests/entrypoints/openai/test_embedding.py
+++ b/tests/entrypoints/pooling/openai/test_embedding.py
@@ -11,14 +11,13 @@ import requests
 import torch
 import torch.nn.functional as F
 
+from tests.models.language.pooling.embed_utils import (
+    run_embedding_correctness_test)
+from tests.models.utils import check_embeddings_close
+from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
-from ...models.language.pooling.embed_utils import (
-    run_embedding_correctness_test)
-from ...models.utils import check_embeddings_close
-from ...utils import RemoteOpenAIServer
-
 MODEL_NAME = "intfloat/multilingual-e5-small"
 DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}"""  # noqa: E501
 DTYPE = "bfloat16"
diff --git a/tests/entrypoints/openai/test_embedding_dimensions.py b/tests/entrypoints/pooling/openai/test_embedding_dimensions.py
similarity index 95%
rename from tests/entrypoints/openai/test_embedding_dimensions.py
rename to tests/entrypoints/pooling/openai/test_embedding_dimensions.py
index 91e91699b92ca..3c7e88daa8ff3 100644
--- a/tests/entrypoints/openai/test_embedding_dimensions.py
+++ b/tests/entrypoints/pooling/openai/test_embedding_dimensions.py
@@ -9,13 +9,12 @@ from typing import Optional
 import openai
 import pytest
 
-from vllm.entrypoints.openai.protocol import EmbeddingResponse
-
-from ...conftest import HfRunner
-from ...models.language.pooling.embed_utils import (
+from tests.conftest import HfRunner
+from tests.models.language.pooling.embed_utils import (
     run_embedding_correctness_test)
-from ...models.utils import EmbedModelInfo
-from ...utils import RemoteOpenAIServer
+from tests.models.utils import EmbedModelInfo
+from tests.utils import RemoteOpenAIServer
+from vllm.entrypoints.openai.protocol import EmbeddingResponse
 
 MODELS = [
     EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False),
diff --git a/tests/entrypoints/openai/test_embedding_long_text.py b/tests/entrypoints/pooling/openai/test_embedding_long_text.py
similarity index 99%
rename from tests/entrypoints/openai/test_embedding_long_text.py
rename to tests/entrypoints/pooling/openai/test_embedding_long_text.py
index 86bd34abb97e0..2d3da238d245e 100644
--- a/tests/entrypoints/openai/test_embedding_long_text.py
+++ b/tests/entrypoints/pooling/openai/test_embedding_long_text.py
@@ -14,10 +14,9 @@ import openai
 import pytest
 import pytest_asyncio
 
+from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 
-from ...utils import RemoteOpenAIServer
-
 
 def _generate_random_text(word_count: int) -> str:
     """Generate random text with approximately the specified word count."""
diff --git a/tests/entrypoints/openai/test_pooling.py b/tests/entrypoints/pooling/openai/test_pooling.py
similarity index 99%
rename from tests/entrypoints/openai/test_pooling.py
rename to tests/entrypoints/pooling/openai/test_pooling.py
index 63f4205e0a42b..9f58955cfb40b 100644
--- a/tests/entrypoints/openai/test_pooling.py
+++ b/tests/entrypoints/pooling/openai/test_pooling.py
@@ -8,11 +8,10 @@ import pytest
 import requests
 
 from tests.models.utils import check_embeddings_close
+from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import PoolingResponse
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
-from ...utils import RemoteOpenAIServer
-
 MODEL_NAME = "internlm/internlm2-1_8b-reward"
 DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}"""  # noqa: E501
 
diff --git a/tests/entrypoints/openai/test_rerank.py b/tests/entrypoints/pooling/openai/test_rerank.py
similarity index 99%
rename from tests/entrypoints/openai/test_rerank.py
rename to tests/entrypoints/pooling/openai/test_rerank.py
index ce4d6c5f5d337..992cb5147ef0d 100644
--- a/tests/entrypoints/openai/test_rerank.py
+++ b/tests/entrypoints/pooling/openai/test_rerank.py
@@ -6,10 +6,9 @@ import requests
 import torch
 import torch.nn.functional as F
 
+from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import RerankResponse
 
-from ...utils import RemoteOpenAIServer
-
 MODEL_NAME = "BAAI/bge-reranker-base"
 DTYPE = "bfloat16"
 
diff --git a/tests/entrypoints/openai/test_score.py b/tests/entrypoints/pooling/openai/test_score.py
similarity index 99%
rename from tests/entrypoints/openai/test_score.py
rename to tests/entrypoints/pooling/openai/test_score.py
index 4fafcfb45fa22..d676ecccbc87c 100644
--- a/tests/entrypoints/openai/test_score.py
+++ b/tests/entrypoints/pooling/openai/test_score.py
@@ -8,10 +8,9 @@ import torch
 import torch.nn.functional as F
 from torch import tensor
 
+from tests.utils import RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import ScoreResponse
 
-from ...utils import RemoteOpenAIServer
-
 MODELS = [
     {
         "name": "BAAI/bge-reranker-v2-m3",
diff --git a/tests/entrypoints/openai/test_truncation.py b/tests/entrypoints/pooling/openai/test_truncation.py
similarity index 100%
rename from tests/entrypoints/openai/test_truncation.py
rename to tests/entrypoints/pooling/openai/test_truncation.py
diff --git a/tests/entrypoints/openai/test_vision_embedding.py b/tests/entrypoints/pooling/openai/test_vision_embedding.py
similarity index 98%
rename from tests/entrypoints/openai/test_vision_embedding.py
rename to tests/entrypoints/pooling/openai/test_vision_embedding.py
index dbd403fb7a7b5..48434e36eb265 100644
--- a/tests/entrypoints/openai/test_vision_embedding.py
+++ b/tests/entrypoints/pooling/openai/test_vision_embedding.py
@@ -7,11 +7,10 @@ import pytest
 import requests
 from transformers import AutoProcessor
 
+from tests.utils import VLLM_PATH, RemoteOpenAIServer
 from vllm.entrypoints.openai.protocol import EmbeddingResponse
 from vllm.multimodal.utils import encode_image_base64, fetch_image
 
-from ...utils import VLLM_PATH, RemoteOpenAIServer
-
 MODEL_NAME = "TIGER-Lab/VLM2Vec-Full"
 MAXIMUM_IMAGES = 2