mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-07 01:42:17 +08:00
[ROCm][Bugfix] Add MLACommonMetadata to allowed attention types for speculative decoding (#30430)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
parent
0e71eaa644
commit
72aaac5b66
@ -178,6 +178,12 @@ class EagleProposer:
|
|||||||
)
|
)
|
||||||
|
|
||||||
rocm_types.append(AiterFlashAttentionMetadata)
|
rocm_types.append(AiterFlashAttentionMetadata)
|
||||||
|
|
||||||
|
# TRITON_MLA backend support for MLA models (e.g., DeepSeek)
|
||||||
|
from vllm.v1.attention.backends.mla.common import MLACommonMetadata
|
||||||
|
|
||||||
|
rocm_types.append(MLACommonMetadata)
|
||||||
|
|
||||||
self.allowed_attn_types = tuple(rocm_types)
|
self.allowed_attn_types = tuple(rocm_types)
|
||||||
|
|
||||||
# Parse the speculative token tree.
|
# Parse the speculative token tree.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user