[ROCm][Bugfix] Add MLACommonMetadata to allowed attention types for speculative decoding (#30430)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2026-06-07 01:42:17 +08:00 · 2025-12-11 13:25:01 -06:00 · 2025-12-11 13:25:01 -06:00 · 72aaac5b66
commit 72aaac5b66
parent 0e71eaa644
1 changed files with 6 additions and 0 deletions
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@ -178,6 +178,12 @@ class EagleProposer:
                )
                rocm_types.append(AiterFlashAttentionMetadata)
            # TRITON_MLA backend support for MLA models (e.g., DeepSeek)
            from vllm.v1.attention.backends.mla.common import MLACommonMetadata
            rocm_types.append(MLACommonMetadata)
            self.allowed_attn_types = tuple(rocm_types)
        # Parse the speculative token tree.