mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-05 04:49:10 +08:00
Merge branch 'main' into mlm-full-lora-support
This commit is contained in:
commit
e5ba472ae2
@ -130,6 +130,7 @@ RUN --mount=type=bind,from=export_vllm,src=/,target=/install \
|
|||||||
&& uv pip install --system *.whl
|
&& uv pip install --system *.whl
|
||||||
|
|
||||||
ARG COMMON_WORKDIR
|
ARG COMMON_WORKDIR
|
||||||
|
ARG BASE_IMAGE
|
||||||
|
|
||||||
# Copy over the benchmark scripts as well
|
# Copy over the benchmark scripts as well
|
||||||
COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
|
COPY --from=export_vllm /benchmarks ${COMMON_WORKDIR}/vllm/benchmarks
|
||||||
@ -144,4 +145,9 @@ ENV SAFETENSORS_FAST_GPU=1
|
|||||||
# Performance environment variable.
|
# Performance environment variable.
|
||||||
ENV HIP_FORCE_DEV_KERNARG=1
|
ENV HIP_FORCE_DEV_KERNARG=1
|
||||||
|
|
||||||
|
# Workaround for ROCm profiler limits
|
||||||
|
RUN echo "ROCTRACER_MAX_EVENTS=10000000" > ${COMMON_WORKDIR}/libkineto.conf
|
||||||
|
ENV KINETO_CONFIG="${COMMON_WORKDIR}/libkineto.conf"
|
||||||
|
RUN echo "VLLM_BASE_IMAGE=${BASE_IMAGE}" >> ${COMMON_WORKDIR}/versions.txt
|
||||||
|
|
||||||
CMD ["/bin/bash"]
|
CMD ["/bin/bash"]
|
||||||
|
|||||||
@ -1,15 +1,15 @@
|
|||||||
ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.1-complete
|
ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.0-complete
|
||||||
ARG TRITON_BRANCH="57c693b6"
|
ARG TRITON_BRANCH="a272dfa8"
|
||||||
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
|
ARG TRITON_REPO="https://github.com/ROCm/triton.git"
|
||||||
ARG PYTORCH_BRANCH="1c57644d"
|
ARG PYTORCH_BRANCH="89075173"
|
||||||
ARG PYTORCH_VISION_BRANCH="v0.23.0"
|
|
||||||
ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git"
|
ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git"
|
||||||
|
ARG PYTORCH_VISION_BRANCH="v0.24.1"
|
||||||
ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
|
ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git"
|
||||||
ARG PYTORCH_AUDIO_BRANCH="v2.9.0"
|
ARG PYTORCH_AUDIO_BRANCH="v2.9.0"
|
||||||
ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git"
|
ARG PYTORCH_AUDIO_REPO="https://github.com/pytorch/audio.git"
|
||||||
ARG FA_BRANCH="0e60e394"
|
ARG FA_BRANCH="0e60e394"
|
||||||
ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
|
ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git"
|
||||||
ARG AITER_BRANCH="59bd8ff2"
|
ARG AITER_BRANCH="6af8b687"
|
||||||
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
ARG AITER_REPO="https://github.com/ROCm/aiter.git"
|
||||||
|
|
||||||
FROM ${BASE_IMAGE} AS base
|
FROM ${BASE_IMAGE} AS base
|
||||||
|
|||||||
18
tests/models/multimodal/pooling/conftest.py
Normal file
18
tests/models/multimodal/pooling/conftest.py
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
"""Pytest configuration for vLLM pooling tests."""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def siglip_attention_config():
|
||||||
|
"""Return attention config for SigLIP tests on ROCm.
|
||||||
|
|
||||||
|
On ROCm, SigLIP tests require FLEX_ATTENTION backend.
|
||||||
|
"""
|
||||||
|
if current_platform.is_rocm():
|
||||||
|
return {"backend": "FLEX_ATTENTION"}
|
||||||
|
return None
|
||||||
169
tests/test_attention_backend_registry.py
Normal file
169
tests/test_attention_backend_registry.py
Normal file
@ -0,0 +1,169 @@
|
|||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
|
from vllm.attention.backends.abstract import (
|
||||||
|
AttentionBackend,
|
||||||
|
AttentionImpl,
|
||||||
|
)
|
||||||
|
from vllm.attention.backends.registry import (
|
||||||
|
AttentionBackendEnum,
|
||||||
|
MambaAttentionBackendEnum,
|
||||||
|
register_backend,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CustomAttentionImpl(AttentionImpl):
|
||||||
|
"""Mock custom attention implementation for testing."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def forward(self, *args, **kwargs):
|
||||||
|
"""Mock forward pass."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CustomAttentionBackend(AttentionBackend):
|
||||||
|
"""Mock custom attention backend for testing."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_name():
|
||||||
|
return "CUSTOM"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_impl_cls():
|
||||||
|
return CustomAttentionImpl
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_builder_cls():
|
||||||
|
"""Mock builder class."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_required_kv_cache_layout():
|
||||||
|
"""Mock KV cache layout."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class CustomMambaAttentionImpl(AttentionImpl):
|
||||||
|
"""Mock custom mamba attention implementation for testing."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def forward(self, *args, **kwargs):
|
||||||
|
"""Mock forward pass."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class CustomMambaAttentionBackend(AttentionBackend):
|
||||||
|
"""Mock custom mamba attention backend for testing."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_name():
|
||||||
|
return "CUSTOM_MAMBA"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_impl_cls():
|
||||||
|
return CustomMambaAttentionImpl
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_builder_cls():
|
||||||
|
"""Mock builder class."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_required_kv_cache_layout():
|
||||||
|
"""Mock KV cache layout."""
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def test_custom_is_not_alias_of_any_backend():
|
||||||
|
# Get all members of AttentionBackendEnum
|
||||||
|
all_backends = list(AttentionBackendEnum)
|
||||||
|
|
||||||
|
# Find any aliases of CUSTOM
|
||||||
|
aliases = []
|
||||||
|
for backend in all_backends:
|
||||||
|
if backend.name != "CUSTOM" and backend is AttentionBackendEnum.CUSTOM:
|
||||||
|
aliases.append(backend.name)
|
||||||
|
|
||||||
|
# CUSTOM should not be an alias of any other backend
|
||||||
|
assert len(aliases) == 0, (
|
||||||
|
f"BUG! CUSTOM is an alias of: {', '.join(aliases)}!\n"
|
||||||
|
f"CUSTOM.value = {repr(AttentionBackendEnum.CUSTOM.value)}\n"
|
||||||
|
f"This happens when CUSTOM has the same value as another backend.\n"
|
||||||
|
f"When you register to CUSTOM, you're actually registering to {aliases[0]}!\n"
|
||||||
|
f"All backend values:\n"
|
||||||
|
+ "\n".join(f" {b.name}: {repr(b.value)}" for b in all_backends)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify CUSTOM has its own unique identity
|
||||||
|
assert AttentionBackendEnum.CUSTOM.name == "CUSTOM", (
|
||||||
|
f"CUSTOM.name should be 'CUSTOM', but got '{AttentionBackendEnum.CUSTOM.name}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_register_custom_backend_with_class_path():
|
||||||
|
# Register with explicit class path
|
||||||
|
register_backend(
|
||||||
|
backend=AttentionBackendEnum.CUSTOM,
|
||||||
|
class_path="tests.test_attention_backend_registry.CustomAttentionBackend",
|
||||||
|
is_mamba=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check that CUSTOM backend is registered
|
||||||
|
assert AttentionBackendEnum.CUSTOM.is_overridden(), (
|
||||||
|
"CUSTOM should be overridden after registration"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the registered class path
|
||||||
|
class_path = AttentionBackendEnum.CUSTOM.get_path()
|
||||||
|
assert class_path == "tests.test_attention_backend_registry.CustomAttentionBackend"
|
||||||
|
|
||||||
|
# Get the backend class
|
||||||
|
backend_cls = AttentionBackendEnum.CUSTOM.get_class()
|
||||||
|
assert backend_cls.get_name() == "CUSTOM"
|
||||||
|
assert backend_cls.get_impl_cls() == CustomAttentionImpl
|
||||||
|
|
||||||
|
|
||||||
|
def test_mamba_custom_is_not_alias_of_any_backend():
|
||||||
|
# Get all mamba backends
|
||||||
|
all_backends = list(MambaAttentionBackendEnum)
|
||||||
|
|
||||||
|
# Find any aliases of CUSTOM
|
||||||
|
aliases = []
|
||||||
|
for backend in all_backends:
|
||||||
|
if backend.name != "CUSTOM" and backend is MambaAttentionBackendEnum.CUSTOM:
|
||||||
|
aliases.append(backend.name)
|
||||||
|
|
||||||
|
# CUSTOM should not be an alias of any other backend
|
||||||
|
assert len(aliases) == 0, (
|
||||||
|
f"BUG! MambaAttentionBackendEnum.CUSTOM is an alias of: {', '.join(aliases)}!\n"
|
||||||
|
f"CUSTOM.value = {repr(MambaAttentionBackendEnum.CUSTOM.value)}\n"
|
||||||
|
f"All mamba backend values:\n"
|
||||||
|
+ "\n".join(f" {b.name}: {repr(b.value)}" for b in all_backends)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_register_custom_mamba_backend_with_class_path():
|
||||||
|
# Register with explicit class path
|
||||||
|
register_backend(
|
||||||
|
backend=MambaAttentionBackendEnum.CUSTOM,
|
||||||
|
class_path="tests.test_attention_backend_registry.CustomMambaAttentionBackend",
|
||||||
|
is_mamba=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check that the backend is registered
|
||||||
|
assert MambaAttentionBackendEnum.CUSTOM.is_overridden()
|
||||||
|
|
||||||
|
# Get the registered class path
|
||||||
|
class_path = MambaAttentionBackendEnum.CUSTOM.get_path()
|
||||||
|
assert (
|
||||||
|
class_path
|
||||||
|
== "tests.test_attention_backend_registry.CustomMambaAttentionBackend"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get the backend class
|
||||||
|
backend_cls = MambaAttentionBackendEnum.CUSTOM.get_class()
|
||||||
|
assert backend_cls.get_name() == "CUSTOM_MAMBA"
|
||||||
|
assert backend_cls.get_impl_cls() == CustomMambaAttentionImpl
|
||||||
@ -77,7 +77,8 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
|
|||||||
)
|
)
|
||||||
CPU_ATTN = "vllm.v1.attention.backends.cpu_attn.CPUAttentionBackend"
|
CPU_ATTN = "vllm.v1.attention.backends.cpu_attn.CPUAttentionBackend"
|
||||||
# Placeholder for third-party/custom backends - must be registered before use
|
# Placeholder for third-party/custom backends - must be registered before use
|
||||||
CUSTOM = ""
|
# set to None to avoid alias with other backend, whose value is an empty string
|
||||||
|
CUSTOM = None
|
||||||
|
|
||||||
def get_path(self, include_classname: bool = True) -> str:
|
def get_path(self, include_classname: bool = True) -> str:
|
||||||
"""Get the class path for this backend (respects overrides).
|
"""Get the class path for this backend (respects overrides).
|
||||||
@ -139,7 +140,8 @@ class MambaAttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
|
|||||||
LINEAR = "vllm.v1.attention.backends.linear_attn.LinearAttentionBackend"
|
LINEAR = "vllm.v1.attention.backends.linear_attn.LinearAttentionBackend"
|
||||||
GDN_ATTN = "vllm.v1.attention.backends.gdn_attn.GDNAttentionBackend"
|
GDN_ATTN = "vllm.v1.attention.backends.gdn_attn.GDNAttentionBackend"
|
||||||
# Placeholder for third-party/custom backends - must be registered before use
|
# Placeholder for third-party/custom backends - must be registered before use
|
||||||
CUSTOM = ""
|
# set to None to avoid alias with other backend, whose value is an empty string
|
||||||
|
CUSTOM = None
|
||||||
|
|
||||||
def get_path(self, include_classname: bool = True) -> str:
|
def get_path(self, include_classname: bool = True) -> str:
|
||||||
"""Get the class path for this backend (respects overrides).
|
"""Get the class path for this backend (respects overrides).
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user