mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-10 07:40:13 +08:00
[Attention] Register FLASHMLA_SPARSE (#26441)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
da364615fc
commit
2a03f93de9
@ -21,6 +21,7 @@ class _Backend(enum.Enum):
|
|||||||
TRITON_MLA = enum.auto()
|
TRITON_MLA = enum.auto()
|
||||||
CUTLASS_MLA = enum.auto()
|
CUTLASS_MLA = enum.auto()
|
||||||
FLASHMLA = enum.auto()
|
FLASHMLA = enum.auto()
|
||||||
|
FLASHMLA_SPARSE = enum.auto()
|
||||||
FLASH_ATTN_MLA = enum.auto()
|
FLASH_ATTN_MLA = enum.auto()
|
||||||
PALLAS = enum.auto()
|
PALLAS = enum.auto()
|
||||||
IPEX = enum.auto()
|
IPEX = enum.auto()
|
||||||
@ -43,6 +44,7 @@ BACKEND_MAP = {
|
|||||||
_Backend.TRITON_MLA: "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend", # noqa: E501
|
_Backend.TRITON_MLA: "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend", # noqa: E501
|
||||||
_Backend.CUTLASS_MLA: "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend", # noqa: E501
|
_Backend.CUTLASS_MLA: "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend", # noqa: E501
|
||||||
_Backend.FLASHMLA: "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend", # noqa: E501
|
_Backend.FLASHMLA: "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend", # noqa: E501
|
||||||
|
_Backend.FLASHMLA_SPARSE: "vllm.v1.attention.backends.mla.flashmla_sparse.FlashMLASparseBackend", # noqa: E501
|
||||||
_Backend.FLASH_ATTN_MLA: "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend", # noqa: E501
|
_Backend.FLASH_ATTN_MLA: "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend", # noqa: E501
|
||||||
_Backend.PALLAS: "vllm.v1.attention.backends.pallas.PallasAttentionBackend", # noqa: E501
|
_Backend.PALLAS: "vllm.v1.attention.backends.pallas.PallasAttentionBackend", # noqa: E501
|
||||||
_Backend.FLEX_ATTENTION: "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend", # noqa: E501
|
_Backend.FLEX_ATTENTION: "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend", # noqa: E501
|
||||||
|
|||||||
@ -55,7 +55,7 @@ class FlashMLASparseBackend(AttentionBackend):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_name() -> str:
|
def get_name() -> str:
|
||||||
return "FLASHMLA_SPARSE_VLLM_V1"
|
return "FLASHMLA_SPARSE"
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_metadata_cls() -> type[AttentionMetadata]:
|
def get_metadata_cls() -> type[AttentionMetadata]:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user