[ROCm][Bugfix] Add MLACommonMetadata to allowed attention types for speculative decoding (#30430)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
This commit is contained in:
Andreas Karatzas 2025-12-11 13:25:01 -06:00 committed by GitHub
parent 0e71eaa644
commit 72aaac5b66
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -178,6 +178,12 @@ class EagleProposer:
)
rocm_types.append(AiterFlashAttentionMetadata)
# TRITON_MLA backend support for MLA models (e.g., DeepSeek)
from vllm.v1.attention.backends.mla.common import MLACommonMetadata
rocm_types.append(MLACommonMetadata)
self.allowed_attn_types = tuple(rocm_types)
# Parse the speculative token tree.