From d52c5096d7305abc7f266026cb042121ff5bccda Mon Sep 17 00:00:00 2001 From: zejunchen-zejun Date: Sat, 20 Dec 2025 09:03:35 +0800 Subject: [PATCH 1/3] [Bugfix] fix the alias bug of AttentionBackendEnum when register CUSTOM attention backend to vllm (#30869) Signed-off-by: zejunchen-zejun --- tests/test_attention_backend_registry.py | 169 +++++++++++++++++++++++ vllm/attention/backends/registry.py | 6 +- 2 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 tests/test_attention_backend_registry.py diff --git a/tests/test_attention_backend_registry.py b/tests/test_attention_backend_registry.py new file mode 100644 index 0000000000000..7b90b949aa457 --- /dev/null +++ b/tests/test_attention_backend_registry.py @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from vllm.attention.backends.abstract import ( + AttentionBackend, + AttentionImpl, +) +from vllm.attention.backends.registry import ( + AttentionBackendEnum, + MambaAttentionBackendEnum, + register_backend, +) + + +class CustomAttentionImpl(AttentionImpl): + """Mock custom attention implementation for testing.""" + + def __init__(self, *args, **kwargs): + super().__init__() + + def forward(self, *args, **kwargs): + """Mock forward pass.""" + pass + + +class CustomAttentionBackend(AttentionBackend): + """Mock custom attention backend for testing.""" + + @staticmethod + def get_name(): + return "CUSTOM" + + @staticmethod + def get_impl_cls(): + return CustomAttentionImpl + + @staticmethod + def get_builder_cls(): + """Mock builder class.""" + return None + + @staticmethod + def get_required_kv_cache_layout(): + """Mock KV cache layout.""" + return None + + +class CustomMambaAttentionImpl(AttentionImpl): + """Mock custom mamba attention implementation for testing.""" + + def __init__(self, *args, **kwargs): + super().__init__() + + def forward(self, *args, **kwargs): + """Mock forward pass.""" + pass + + +class CustomMambaAttentionBackend(AttentionBackend): + """Mock custom mamba attention backend for testing.""" + + @staticmethod + def get_name(): + return "CUSTOM_MAMBA" + + @staticmethod + def get_impl_cls(): + return CustomMambaAttentionImpl + + @staticmethod + def get_builder_cls(): + """Mock builder class.""" + return None + + @staticmethod + def get_required_kv_cache_layout(): + """Mock KV cache layout.""" + return None + + +def test_custom_is_not_alias_of_any_backend(): + # Get all members of AttentionBackendEnum + all_backends = list(AttentionBackendEnum) + + # Find any aliases of CUSTOM + aliases = [] + for backend in all_backends: + if backend.name != "CUSTOM" and backend is AttentionBackendEnum.CUSTOM: + aliases.append(backend.name) + + # CUSTOM should not be an alias of any other backend + assert len(aliases) == 0, ( + f"BUG! CUSTOM is an alias of: {', '.join(aliases)}!\n" + f"CUSTOM.value = {repr(AttentionBackendEnum.CUSTOM.value)}\n" + f"This happens when CUSTOM has the same value as another backend.\n" + f"When you register to CUSTOM, you're actually registering to {aliases[0]}!\n" + f"All backend values:\n" + + "\n".join(f" {b.name}: {repr(b.value)}" for b in all_backends) + ) + + # Verify CUSTOM has its own unique identity + assert AttentionBackendEnum.CUSTOM.name == "CUSTOM", ( + f"CUSTOM.name should be 'CUSTOM', but got '{AttentionBackendEnum.CUSTOM.name}'" + ) + + +def test_register_custom_backend_with_class_path(): + # Register with explicit class path + register_backend( + backend=AttentionBackendEnum.CUSTOM, + class_path="tests.test_attention_backend_registry.CustomAttentionBackend", + is_mamba=False, + ) + + # Check that CUSTOM backend is registered + assert AttentionBackendEnum.CUSTOM.is_overridden(), ( + "CUSTOM should be overridden after registration" + ) + + # Get the registered class path + class_path = AttentionBackendEnum.CUSTOM.get_path() + assert class_path == "tests.test_attention_backend_registry.CustomAttentionBackend" + + # Get the backend class + backend_cls = AttentionBackendEnum.CUSTOM.get_class() + assert backend_cls.get_name() == "CUSTOM" + assert backend_cls.get_impl_cls() == CustomAttentionImpl + + +def test_mamba_custom_is_not_alias_of_any_backend(): + # Get all mamba backends + all_backends = list(MambaAttentionBackendEnum) + + # Find any aliases of CUSTOM + aliases = [] + for backend in all_backends: + if backend.name != "CUSTOM" and backend is MambaAttentionBackendEnum.CUSTOM: + aliases.append(backend.name) + + # CUSTOM should not be an alias of any other backend + assert len(aliases) == 0, ( + f"BUG! MambaAttentionBackendEnum.CUSTOM is an alias of: {', '.join(aliases)}!\n" + f"CUSTOM.value = {repr(MambaAttentionBackendEnum.CUSTOM.value)}\n" + f"All mamba backend values:\n" + + "\n".join(f" {b.name}: {repr(b.value)}" for b in all_backends) + ) + + +def test_register_custom_mamba_backend_with_class_path(): + # Register with explicit class path + register_backend( + backend=MambaAttentionBackendEnum.CUSTOM, + class_path="tests.test_attention_backend_registry.CustomMambaAttentionBackend", + is_mamba=True, + ) + + # Check that the backend is registered + assert MambaAttentionBackendEnum.CUSTOM.is_overridden() + + # Get the registered class path + class_path = MambaAttentionBackendEnum.CUSTOM.get_path() + assert ( + class_path + == "tests.test_attention_backend_registry.CustomMambaAttentionBackend" + ) + + # Get the backend class + backend_cls = MambaAttentionBackendEnum.CUSTOM.get_class() + assert backend_cls.get_name() == "CUSTOM_MAMBA" + assert backend_cls.get_impl_cls() == CustomMambaAttentionImpl diff --git a/vllm/attention/backends/registry.py b/vllm/attention/backends/registry.py index ed0021db204ac..416b996df9f22 100644 --- a/vllm/attention/backends/registry.py +++ b/vllm/attention/backends/registry.py @@ -77,7 +77,8 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta): ) CPU_ATTN = "vllm.v1.attention.backends.cpu_attn.CPUAttentionBackend" # Placeholder for third-party/custom backends - must be registered before use - CUSTOM = "" + # set to None to avoid alias with other backend, whose value is an empty string + CUSTOM = None def get_path(self, include_classname: bool = True) -> str: """Get the class path for this backend (respects overrides). @@ -139,7 +140,8 @@ class MambaAttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta): LINEAR = "vllm.v1.attention.backends.linear_attn.LinearAttentionBackend" GDN_ATTN = "vllm.v1.attention.backends.gdn_attn.GDNAttentionBackend" # Placeholder for third-party/custom backends - must be registered before use - CUSTOM = "" + # set to None to avoid alias with other backend, whose value is an empty string + CUSTOM = None def get_path(self, include_classname: bool = True) -> str: """Get the class path for this backend (respects overrides). From 0be149524c0e21d8a86496918b24a919ed7eb3aa Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Fri, 19 Dec 2025 21:19:12 -0600 Subject: [PATCH 2/3] [ROCm][CI/Build] Update ROCm dockerfiles (#30991) Signed-off-by: Gregory Shtrasberg --- docker/Dockerfile.rocm | 6 ++++++ docker/Dockerfile.rocm_base | 10 +++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 1b6bdabc7a539..4c09808a14333 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -130,6 +130,7 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \ && uv pip install --system *.whl ARG COMMON_WORKDIR +ARG BASE_IMAGE # Copy over the benchmark scripts as well COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks @@ -144,4 +145,9 @@ ENV SAFETENSORS_FAST_GPU=1 # Performance environment variable. ENV HIP_FORCE_DEV_KERNARG=1 +# Workaround for ROCm profiler limits +RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf +ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf" +RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt + CMD ["/bin/bash"] diff --git a/docker/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base index a57ee728d9243..ac63231094462 100644 --- a/docker/Dockerfile.rocm_base +++ b/docker/Dockerfile.rocm_base @@ -1,15 +1,15 @@ -ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.1-complete -ARG TRITON_BRANCH="57c693b6" +ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.0-complete +ARG TRITON_BRANCH="a272dfa8" ARG TRITON_REPO="https://github.com/ROCm/triton.git" -ARG PYTORCH_BRANCH="1c57644d" -ARG PYTORCH_VISION_BRANCH="v0.23.0" +ARG PYTORCH_BRANCH="89075173" ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git" +ARG PYTORCH_VISION_BRANCH="v0.24.1" ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" ARG PYTORCH_AUDIO_BRANCH="v2.9.0" ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git" ARG FA_BRANCH="0e60e394" ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git" -ARG AITER_BRANCH="59bd8ff2" +ARG AITER_BRANCH="6af8b687" ARG AITER_REPO="https://github.com/ROCm/aiter.git" FROM ${BASE_IMAGE} AS base From ff2168bca3a195b835c64a5c9012d7b6a9f34e61 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Fri, 19 Dec 2025 22:46:15 -0500 Subject: [PATCH 3/3] [CI] FIx `fixture 'siglip_attention_config' not found` (#31053) Signed-off-by: Lucas Wilkinson --- tests/models/multimodal/pooling/conftest.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/models/multimodal/pooling/conftest.py diff --git a/tests/models/multimodal/pooling/conftest.py b/tests/models/multimodal/pooling/conftest.py new file mode 100644 index 0000000000000..401bc39b4b109 --- /dev/null +++ b/tests/models/multimodal/pooling/conftest.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Pytest configuration for vLLM pooling tests.""" + +import pytest + +from vllm.platforms import current_platform + + +@pytest.fixture +def siglip_attention_config(): + """Return attention config for SigLIP tests on ROCm. + + On ROCm, SigLIP tests require FLEX_ATTENTION backend. + """ + if current_platform.is_rocm(): + return {"backend": "FLEX_ATTENTION"} + return None