mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-24 17:06:37 +08:00
[ROCm][Bugfix] Add MLACommonMetadata to allowed attention types for speculative decoding (#30430)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
parent
0e71eaa644
commit
72aaac5b66
@ -178,6 +178,12 @@ class EagleProposer:
|
||||
)
|
||||
|
||||
rocm_types.append(AiterFlashAttentionMetadata)
|
||||
|
||||
# TRITON_MLA backend support for MLA models (e.g., DeepSeek)
|
||||
from vllm.v1.attention.backends.mla.common import MLACommonMetadata
|
||||
|
||||
rocm_types.append(MLACommonMetadata)
|
||||
|
||||
self.allowed_attn_types = tuple(rocm_types)
|
||||
|
||||
# Parse the speculative token tree.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user