From 6f403501a085f4917e49e1714bdf44d2aabd06f9 Mon Sep 17 00:00:00 2001 From: rasmith Date: Fri, 21 Nov 2025 20:13:18 -0600 Subject: [PATCH] [CI/Build][AMD] Enable Entrypoints Integration Test (Pooling) to run without error on ROCm (#29212) Signed-off-by: Randall Smith Co-authored-by: Randall Smith --- tests/entrypoints/pooling/correctness/test_mteb_embed.py | 6 ++++++ tests/entrypoints/pooling/correctness/test_mteb_score.py | 6 ++++++ tests/entrypoints/pooling/llm/test_embedding.py | 6 ++++++ tests/entrypoints/pooling/llm/test_encode.py | 6 ++++++ tests/entrypoints/pooling/llm/test_score.py | 6 ++++++ tests/entrypoints/pooling/openai/test_embedding.py | 6 ++++++ .../entrypoints/pooling/openai/test_embedding_dimensions.py | 6 ++++++ .../entrypoints/pooling/openai/test_embedding_long_text.py | 6 ++++++ tests/entrypoints/pooling/openai/test_rerank.py | 6 ++++++ tests/entrypoints/pooling/openai/test_score.py | 6 ++++++ tests/entrypoints/pooling/openai/test_truncation.py | 6 ++++++ 11 files changed, 66 insertions(+) diff --git a/tests/entrypoints/pooling/correctness/test_mteb_embed.py b/tests/entrypoints/pooling/correctness/test_mteb_embed.py index 7f16638e51e2c..64673534fd32a 100644 --- a/tests/entrypoints/pooling/correctness/test_mteb_embed.py +++ b/tests/entrypoints/pooling/correctness/test_mteb_embed.py @@ -11,6 +11,12 @@ from tests.models.language.pooling_mteb_test.mteb_utils import ( run_mteb_embed_task, ) from tests.utils import RemoteOpenAIServer +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) os.environ["VLLM_LOGGING_LEVEL"] = "WARNING" diff --git a/tests/entrypoints/pooling/correctness/test_mteb_score.py b/tests/entrypoints/pooling/correctness/test_mteb_score.py index 1afe68b189db8..81ad0097187b0 100644 --- a/tests/entrypoints/pooling/correctness/test_mteb_score.py +++ b/tests/entrypoints/pooling/correctness/test_mteb_score.py @@ -13,6 +13,12 @@ from tests.models.language.pooling_mteb_test.mteb_utils import ( run_mteb_rerank, ) from tests.utils import RemoteOpenAIServer +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) os.environ["VLLM_LOGGING_LEVEL"] = "WARNING" diff --git a/tests/entrypoints/pooling/llm/test_embedding.py b/tests/entrypoints/pooling/llm/test_embedding.py index 5455b5f91fc09..f5eab4c29ae18 100644 --- a/tests/entrypoints/pooling/llm/test_embedding.py +++ b/tests/entrypoints/pooling/llm/test_embedding.py @@ -9,6 +9,12 @@ import torch.nn.functional as F from vllm import LLM, PoolingParams from vllm.distributed import cleanup_dist_env_and_memory +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) MODEL_NAME = "intfloat/multilingual-e5-small" diff --git a/tests/entrypoints/pooling/llm/test_encode.py b/tests/entrypoints/pooling/llm/test_encode.py index ca85d2758fce4..f86ecef2e4744 100644 --- a/tests/entrypoints/pooling/llm/test_encode.py +++ b/tests/entrypoints/pooling/llm/test_encode.py @@ -7,6 +7,12 @@ import pytest from vllm import LLM, PoolingParams from vllm.distributed import cleanup_dist_env_and_memory +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) MODEL_NAME = "intfloat/multilingual-e5-small" diff --git a/tests/entrypoints/pooling/llm/test_score.py b/tests/entrypoints/pooling/llm/test_score.py index b69c6a47c1913..ce36d61cb8476 100644 --- a/tests/entrypoints/pooling/llm/test_score.py +++ b/tests/entrypoints/pooling/llm/test_score.py @@ -9,6 +9,12 @@ import torch from tests.models.utils import softmax from vllm import LLM, PoolingParams from vllm.distributed import cleanup_dist_env_and_memory +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) MODEL_NAME = "tomaarsen/Qwen3-Reranker-0.6B-seq-cls" diff --git a/tests/entrypoints/pooling/openai/test_embedding.py b/tests/entrypoints/pooling/openai/test_embedding.py index e971b23e8f1a0..0c88d800e2f99 100644 --- a/tests/entrypoints/pooling/openai/test_embedding.py +++ b/tests/entrypoints/pooling/openai/test_embedding.py @@ -19,6 +19,7 @@ from vllm.entrypoints.openai.protocol import ( EmbeddingResponse, PoolingResponse, ) +from vllm.platforms import current_platform from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.utils.serial_utils import ( EMBED_DTYPE_TO_TORCH_DTYPE, @@ -28,6 +29,11 @@ from vllm.utils.serial_utils import ( decode_pooling_output, ) +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) + MODEL_NAME = "intfloat/multilingual-e5-small" DUMMY_CHAT_TEMPLATE = """{% for message in messages %}{{message['role'] + ': ' + message['content'] + '\\n'}}{% endfor %}""" # noqa: E501 DTYPE = "bfloat16" diff --git a/tests/entrypoints/pooling/openai/test_embedding_dimensions.py b/tests/entrypoints/pooling/openai/test_embedding_dimensions.py index ba9fb64262772..8018dac2d3ffe 100644 --- a/tests/entrypoints/pooling/openai/test_embedding_dimensions.py +++ b/tests/entrypoints/pooling/openai/test_embedding_dimensions.py @@ -12,6 +12,12 @@ from tests.models.language.pooling.embed_utils import run_embedding_correctness_ from tests.models.utils import EmbedModelInfo from tests.utils import RemoteOpenAIServer from vllm.entrypoints.openai.protocol import EmbeddingResponse +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) MODELS = [ EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False), diff --git a/tests/entrypoints/pooling/openai/test_embedding_long_text.py b/tests/entrypoints/pooling/openai/test_embedding_long_text.py index f977c81a9084e..a9ade09dad0b5 100644 --- a/tests/entrypoints/pooling/openai/test_embedding_long_text.py +++ b/tests/entrypoints/pooling/openai/test_embedding_long_text.py @@ -16,6 +16,12 @@ import pytest_asyncio from tests.utils import RemoteOpenAIServer from vllm.entrypoints.openai.protocol import EmbeddingResponse +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) def _generate_random_text(word_count: int) -> str: diff --git a/tests/entrypoints/pooling/openai/test_rerank.py b/tests/entrypoints/pooling/openai/test_rerank.py index 1d85190c12a19..5a772e22a7414 100644 --- a/tests/entrypoints/pooling/openai/test_rerank.py +++ b/tests/entrypoints/pooling/openai/test_rerank.py @@ -8,6 +8,12 @@ import torch.nn.functional as F from tests.utils import RemoteOpenAIServer from vllm.entrypoints.openai.protocol import PoolingResponse, RerankResponse +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) MODEL_NAME = "BAAI/bge-reranker-base" DTYPE = "bfloat16" diff --git a/tests/entrypoints/pooling/openai/test_score.py b/tests/entrypoints/pooling/openai/test_score.py index b8f796d47efaa..ceff9d0181825 100644 --- a/tests/entrypoints/pooling/openai/test_score.py +++ b/tests/entrypoints/pooling/openai/test_score.py @@ -10,6 +10,12 @@ from torch import tensor from tests.utils import RemoteOpenAIServer from vllm.entrypoints.openai.protocol import ScoreResponse +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) MODELS = [ {"name": "BAAI/bge-reranker-v2-m3", "is_cross_encoder": True}, diff --git a/tests/entrypoints/pooling/openai/test_truncation.py b/tests/entrypoints/pooling/openai/test_truncation.py index 6889628dc9145..0d2d385840402 100644 --- a/tests/entrypoints/pooling/openai/test_truncation.py +++ b/tests/entrypoints/pooling/openai/test_truncation.py @@ -7,6 +7,12 @@ import pytest import pytest_asyncio from tests.utils import RemoteOpenAIServer +from vllm.platforms import current_platform + +if current_platform.is_rocm(): + pytest.skip( + "Encoder self-attention is not implemented on ROCm.", allow_module_level=True + ) MODEL_NAME = "sentence-transformers/all-MiniLM-L12-v2" max_model_len = 128