Merge branch 'main' into mlm-full-lora-support

2026-05-09 02:11:25 +08:00 · 2025-12-20 15:19:28 +08:00 · 2025-12-20 15:19:28 +08:00 · e5ba472ae2
commit e5ba472ae2
parent 4c2e95ad56 ff2168bca3
5 changed files with 202 additions and 7 deletions
--- a/docker/Dockerfile.rocm
+++ b/docker/Dockerfile.rocm
@ -130,6 +130,7 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
    && uv pip install --system *.whl

 ARG COMMON_WORKDIR
+ARG BASE_IMAGE

 # Copy over the benchmark scripts as well
 COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
@ -144,4 +145,9 @@ ENV SAFETENSORS_FAST_GPU=1
 # Performance environment variable.
 ENV HIP_FORCE_DEV_KERNARG=1

+# Workaround for ROCm profiler limits
+RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf
+ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf"
+RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt
+
 CMD ["/bin/bash"]
--- a/docker/Dockerfile.rocm_base
+++ b/docker/Dockerfile.rocm_base
@ -1,15 +1,15 @@
-ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.1-complete
-ARG TRITON_BRANCH="57c693b6"
+ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.0-complete
+ARG TRITON_BRANCH="a272dfa8"
 ARG TRITON_REPO="https://github.com/ROCm/triton.git"
-ARG PYTORCH_BRANCH="1c57644d"
-ARG PYTORCH_VISION_BRANCH="v0.23.0"
+ARG PYTORCH_BRANCH="89075173"
 ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git"
+ARG PYTORCH_VISION_BRANCH="v0.24.1"
 ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
 ARG PYTORCH_AUDIO_BRANCH="v2.9.0"
 ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git"
 ARG FA_BRANCH="0e60e394"
 ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
-ARG AITER_BRANCH="59bd8ff2"
+ARG AITER_BRANCH="6af8b687"
 ARG AITER_REPO="https://github.com/ROCm/aiter.git"

 FROM ${BASE_IMAGE} AS base
--- a/tests/models/multimodal/pooling/conftest.py
+++ b/tests/models/multimodal/pooling/conftest.py
@ -0,0 +1,18 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Pytest configuration for vLLM pooling tests."""
+
+import pytest
+
+from vllm.platforms import current_platform
+
+
+@pytest.fixture
+def siglip_attention_config():
+    """Return attention config for SigLIP tests on ROCm.
+
+    On ROCm, SigLIP tests require FLEX_ATTENTION backend.
+    """
+    if current_platform.is_rocm():
+        return {"backend": "FLEX_ATTENTION"}
+    return None
--- a/tests/test_attention_backend_registry.py
+++ b/tests/test_attention_backend_registry.py
@ -0,0 +1,169 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from vllm.attention.backends.abstract import (
+    AttentionBackend,
+    AttentionImpl,
+)
+from vllm.attention.backends.registry import (
+    AttentionBackendEnum,
+    MambaAttentionBackendEnum,
+    register_backend,
+)
+
+
+class CustomAttentionImpl(AttentionImpl):
+    """Mock custom attention implementation for testing."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+
+    def forward(self, *args, **kwargs):
+        """Mock forward pass."""
+        pass
+
+
+class CustomAttentionBackend(AttentionBackend):
+    """Mock custom attention backend for testing."""
+
+    @staticmethod
+    def get_name():
+        return "CUSTOM"
+
+    @staticmethod
+    def get_impl_cls():
+        return CustomAttentionImpl
+
+    @staticmethod
+    def get_builder_cls():
+        """Mock builder class."""
+        return None
+
+    @staticmethod
+    def get_required_kv_cache_layout():
+        """Mock KV cache layout."""
+        return None
+
+
+class CustomMambaAttentionImpl(AttentionImpl):
+    """Mock custom mamba attention implementation for testing."""
+
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+
+    def forward(self, *args, **kwargs):
+        """Mock forward pass."""
+        pass
+
+
+class CustomMambaAttentionBackend(AttentionBackend):
+    """Mock custom mamba attention backend for testing."""
+
+    @staticmethod
+    def get_name():
+        return "CUSTOM_MAMBA"
+
+    @staticmethod
+    def get_impl_cls():
+        return CustomMambaAttentionImpl
+
+    @staticmethod
+    def get_builder_cls():
+        """Mock builder class."""
+        return None
+
+    @staticmethod
+    def get_required_kv_cache_layout():
+        """Mock KV cache layout."""
+        return None
+
+
+def test_custom_is_not_alias_of_any_backend():
+    # Get all members of AttentionBackendEnum
+    all_backends = list(AttentionBackendEnum)
+
+    # Find any aliases of CUSTOM
+    aliases = []
+    for backend in all_backends:
+        if backend.name != "CUSTOM" and backend is AttentionBackendEnum.CUSTOM:
+            aliases.append(backend.name)
+
+    # CUSTOM should not be an alias of any other backend
+    assert len(aliases) == 0, (
+        f"BUG! CUSTOM is an alias of: {', '.join(aliases)}!\n"
+        f"CUSTOM.value = {repr(AttentionBackendEnum.CUSTOM.value)}\n"
+        f"This happens when CUSTOM has the same value as another backend.\n"
+        f"When you register to CUSTOM, you're actually registering to {aliases[0]}!\n"
+        f"All backend values:\n"
+        + "\n".join(f"  {b.name}: {repr(b.value)}" for b in all_backends)
+    )
+
+    # Verify CUSTOM has its own unique identity
+    assert AttentionBackendEnum.CUSTOM.name == "CUSTOM", (
+        f"CUSTOM.name should be 'CUSTOM', but got '{AttentionBackendEnum.CUSTOM.name}'"
+    )
+
+
+def test_register_custom_backend_with_class_path():
+    # Register with explicit class path
+    register_backend(
+        backend=AttentionBackendEnum.CUSTOM,
+        class_path="tests.test_attention_backend_registry.CustomAttentionBackend",
+        is_mamba=False,
+    )
+
+    # Check that CUSTOM backend is registered
+    assert AttentionBackendEnum.CUSTOM.is_overridden(), (
+        "CUSTOM should be overridden after registration"
+    )
+
+    # Get the registered class path
+    class_path = AttentionBackendEnum.CUSTOM.get_path()
+    assert class_path == "tests.test_attention_backend_registry.CustomAttentionBackend"
+
+    # Get the backend class
+    backend_cls = AttentionBackendEnum.CUSTOM.get_class()
+    assert backend_cls.get_name() == "CUSTOM"
+    assert backend_cls.get_impl_cls() == CustomAttentionImpl
+
+
+def test_mamba_custom_is_not_alias_of_any_backend():
+    # Get all mamba backends
+    all_backends = list(MambaAttentionBackendEnum)
+
+    # Find any aliases of CUSTOM
+    aliases = []
+    for backend in all_backends:
+        if backend.name != "CUSTOM" and backend is MambaAttentionBackendEnum.CUSTOM:
+            aliases.append(backend.name)
+
+    # CUSTOM should not be an alias of any other backend
+    assert len(aliases) == 0, (
+        f"BUG! MambaAttentionBackendEnum.CUSTOM is an alias of: {', '.join(aliases)}!\n"
+        f"CUSTOM.value = {repr(MambaAttentionBackendEnum.CUSTOM.value)}\n"
+        f"All mamba backend values:\n"
+        + "\n".join(f"  {b.name}: {repr(b.value)}" for b in all_backends)
+    )
+
+
+def test_register_custom_mamba_backend_with_class_path():
+    # Register with explicit class path
+    register_backend(
+        backend=MambaAttentionBackendEnum.CUSTOM,
+        class_path="tests.test_attention_backend_registry.CustomMambaAttentionBackend",
+        is_mamba=True,
+    )
+
+    # Check that the backend is registered
+    assert MambaAttentionBackendEnum.CUSTOM.is_overridden()
+
+    # Get the registered class path
+    class_path = MambaAttentionBackendEnum.CUSTOM.get_path()
+    assert (
+        class_path
+        == "tests.test_attention_backend_registry.CustomMambaAttentionBackend"
+    )
+
+    # Get the backend class
+    backend_cls = MambaAttentionBackendEnum.CUSTOM.get_class()
+    assert backend_cls.get_name() == "CUSTOM_MAMBA"
+    assert backend_cls.get_impl_cls() == CustomMambaAttentionImpl
--- a/vllm/attention/backends/registry.py
+++ b/vllm/attention/backends/registry.py
@ -77,7 +77,8 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
    )
    CPU_ATTN = "vllm.v1.attention.backends.cpu_attn.CPUAttentionBackend"
    # Placeholder for third-party/custom backends - must be registered before use
-    CUSTOM = ""
+    # set to None to avoid alias with other backend, whose value is an empty string
+    CUSTOM = None

    def get_path(self, include_classname: bool = True) -> str:
        """Get the class path for this backend (respects overrides).
@ -139,7 +140,8 @@ class MambaAttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
    LINEAR = "vllm.v1.attention.backends.linear_attn.LinearAttentionBackend"
    GDN_ATTN = "vllm.v1.attention.backends.gdn_attn.GDNAttentionBackend"
    # Placeholder for third-party/custom backends - must be registered before use
-    CUSTOM = ""
+    # set to None to avoid alias with other backend, whose value is an empty string
+    CUSTOM = None

    def get_path(self, include_classname: bool = True) -> str:
        """Get the class path for this backend (respects overrides).