diff --git a/tests/kernels/mamba/test_mamba_ssm_ssd.py b/tests/kernels/mamba/test_mamba_ssm_ssd.py index d2b893ffff7c3..2c554baaff76c 100644 --- a/tests/kernels/mamba/test_mamba_ssm_ssd.py +++ b/tests/kernels/mamba/test_mamba_ssm_ssd.py @@ -9,7 +9,7 @@ from einops import rearrange, repeat from vllm.model_executor.layers.mamba.ops.ssd_combined import ( mamba_chunk_scan_combined) from vllm.platforms import current_platform -from vllm.v1.attention.backends.mamba_attn import ( +from vllm.v1.attention.backends.mamba2_attn import ( _query_start_loc_to_chunk_indices_offsets) # Added by the IBM Team, 2024 diff --git a/tests/v1/attention/test_mamba_selectors.py b/tests/v1/attention/test_mamba_selectors.py index 8eaafc5e16816..4245b50c71310 100644 --- a/tests/v1/attention/test_mamba_selectors.py +++ b/tests/v1/attention/test_mamba_selectors.py @@ -4,7 +4,7 @@ import pytest -from vllm.v1.attention.backends.mamba_attn import Mamba2AttentionBackend +from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionBackend from vllm.v1.attention.backends.mamba_selectors import get_mamba_attn_backend diff --git a/vllm/model_executor/layers/mamba/mamba2_metadata.py b/vllm/model_executor/layers/mamba/mamba2_metadata.py index 0a836fd17533a..3256ac034aa11 100644 --- a/vllm/model_executor/layers/mamba/mamba2_metadata.py +++ b/vllm/model_executor/layers/mamba/mamba2_metadata.py @@ -11,7 +11,7 @@ from vllm.attention.backends.placeholder_attn import ( PlaceholderAttentionMetadata) from vllm.attention.backends.utils import PAD_SLOT_ID from vllm.platforms import current_platform -from vllm.v1.attention.backends.mamba_attn import ( +from vllm.v1.attention.backends.mamba2_attn import ( Mamba2AttentionMetadata, _query_start_loc_to_chunk_indices_offsets) diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py index 10a5618c227e8..6bf0c18ebdb47 100644 --- a/vllm/model_executor/layers/mamba/mamba_mixer2.py +++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py @@ -36,7 +36,7 @@ from vllm.model_executor.models.mamba_cache import MambaCacheParams from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op -from vllm.v1.attention.backends.mamba_attn import Mamba2AttentionMetadata +from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionMetadata # Added by the IBM Team, 2024 diff --git a/vllm/v1/attention/backends/mamba_attn.py b/vllm/v1/attention/backends/mamba2_attn.py similarity index 100% rename from vllm/v1/attention/backends/mamba_attn.py rename to vllm/v1/attention/backends/mamba2_attn.py diff --git a/vllm/v1/attention/backends/mamba_selectors.py b/vllm/v1/attention/backends/mamba_selectors.py index 852e0dfe1b312..d3a0c63c5e964 100644 --- a/vllm/v1/attention/backends/mamba_selectors.py +++ b/vllm/v1/attention/backends/mamba_selectors.py @@ -3,7 +3,7 @@ from vllm.attention.backends.abstract import AttentionBackend from vllm.v1.attention.backends.linear_attn import LinearAttentionBackend from vllm.v1.attention.backends.mamba1_attn import Mamba1AttentionBackend -from vllm.v1.attention.backends.mamba_attn import Mamba2AttentionBackend +from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionBackend def get_mamba_attn_backend(mamba_type: str) -> type[AttentionBackend]: