[CI] Split pooling from entrypoints Test (#24632)

Signed-off-by: wang.yuqi <noooop@126.com>
This commit is contained in:
wang.yuqi 2025-09-11 16:53:09 +08:00 committed by GitHub
parent ed5ae4aace
commit a8b0361c92
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 32 additions and 30 deletions

View File

@ -113,7 +113,7 @@ steps:
- tests/entrypoints/
commands:
- pytest -v -s entrypoints/openai/tool_parsers
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py
- pytest -v -s entrypoints/ --ignore=entrypoints/llm --ignore=entrypoints/openai --ignore=entrypoints/offline_mode --ignore=entrypoints/test_chat_utils.py --ignore=entrypoints/pooling
- label: Entrypoints Integration Test (LLM) # 30min
timeout_in_minutes: 40
@ -148,6 +148,19 @@ steps:
- pytest -v -s entrypoints/openai --ignore=entrypoints/openai/test_chat_with_tool_reasoning.py --ignore=entrypoints/openai/test_oot_registration.py --ignore=entrypoints/openai/test_tensorizer_entrypoint.py --ignore=entrypoints/openai/correctness/ --ignore=entrypoints/openai/test_collective_rpc.py --ignore=entrypoints/openai/tool_parsers/
- pytest -v -s entrypoints/test_chat_utils.py
- label: Entrypoints Integration Test (Pooling)
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental]
working_dir: "/vllm-workspace/tests"
fast_check: true
torch_nightly: true
source_file_dependencies:
- vllm/
- tests/entrypoints/pooling
commands:
- export VLLM_WORKER_MULTIPROC_METHOD=spawn
- pytest -v -s entrypoints/pooling
- label: Distributed Tests (4 GPUs) # 35min
timeout_in_minutes: 50
mirror_hardwares: [amdexperimental]

View File

View File

@ -6,11 +6,10 @@ import weakref
import pytest
import torch
from tests.models.utils import softmax
from vllm import LLM, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory
from ...models.utils import softmax
MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
prompts = ["The chef prepared a delicious meal."]

View File

@ -6,11 +6,10 @@ import weakref
import pytest
import torch
from tests.models.utils import softmax
from vllm import LLM, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory
from ...models.utils import softmax
MODEL_NAME = "internlm/internlm2-1_8b-reward"
prompts = ["The chef prepared a delicious meal."]

View File

@ -6,11 +6,10 @@ import weakref
import pytest
import torch
from tests.models.utils import softmax
from vllm import LLM, PoolingParams
from vllm.distributed import cleanup_dist_env_and_memory
from ...models.utils import softmax
MODEL_NAME = "tomaarsen/Qwen3-Reranker-0.6B-seq-cls"

View File

@ -6,10 +6,9 @@ import requests
import torch
import torch.nn.functional as F
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import ClassificationResponse
from ...utils import RemoteOpenAIServer
MODEL_NAME = "jason9693/Qwen2.5-1.5B-apeach"
DTYPE = "float32" # Use float32 to avoid NaN issue

View File

@ -11,14 +11,13 @@ import requests
import torch
import torch.nn.functional as F
from tests.models.language.pooling.embed_utils import (
run_embedding_correctness_test)
from tests.models.utils import check_embeddings_close
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import EmbeddingResponse
from vllm.transformers_utils.tokenizer import get_tokenizer
from ...models.language.pooling.embed_utils import (
run_embedding_correctness_test)
from ...models.utils import check_embeddings_close
from ...utils import RemoteOpenAIServer
MODEL_NAME = "intfloat/multilingual-e5-small"
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501
DTYPE = "bfloat16"

View File

@ -9,13 +9,12 @@ from typing import Optional
import openai
import pytest
from vllm.entrypoints.openai.protocol import EmbeddingResponse
from ...conftest import HfRunner
from ...models.language.pooling.embed_utils import (
from tests.conftest import HfRunner
from tests.models.language.pooling.embed_utils import (
run_embedding_correctness_test)
from ...models.utils import EmbedModelInfo
from ...utils import RemoteOpenAIServer
from tests.models.utils import EmbedModelInfo
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import EmbeddingResponse
MODELS = [
EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False),

View File

@ -14,10 +14,9 @@ import openai
import pytest
import pytest_asyncio
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import EmbeddingResponse
from ...utils import RemoteOpenAIServer
def _generate_random_text(word_count: int) -> str:
"""Generate random text with approximately the specified word count."""

View File

@ -8,11 +8,10 @@ import pytest
import requests
from tests.models.utils import check_embeddings_close
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import PoolingResponse
from vllm.transformers_utils.tokenizer import get_tokenizer
from ...utils import RemoteOpenAIServer
MODEL_NAME = "internlm/internlm2-1_8b-reward"
DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501

View File

@ -6,10 +6,9 @@ import requests
import torch
import torch.nn.functional as F
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import RerankResponse
from ...utils import RemoteOpenAIServer
MODEL_NAME = "BAAI/bge-reranker-base"
DTYPE = "bfloat16"

View File

@ -8,10 +8,9 @@ import torch
import torch.nn.functional as F
from torch import tensor
from tests.utils import RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import ScoreResponse
from ...utils import RemoteOpenAIServer
MODELS = [
{
"name": "BAAI/bge-reranker-v2-m3",

View File

@ -7,11 +7,10 @@ import pytest
import requests
from transformers import AutoProcessor
from tests.utils import VLLM_PATH, RemoteOpenAIServer
from vllm.entrypoints.openai.protocol import EmbeddingResponse
from vllm.multimodal.utils import encode_image_base64, fetch_image
from ...utils import VLLM_PATH, RemoteOpenAIServer
MODEL_NAME = "TIGER-Lab/VLM2Vec-Full"
MAXIMUM_IMAGES = 2