[ROCm][Bugfix] Add MLACommonMetadata to allowed attention types for speculative decoding (#30430)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2026-03-16 13:57:12 +08:00 · 2025-12-11 13:25:01 -06:00 · 2025-12-11 13:25:01 -06:00 · 72aaac5b66
commit 72aaac5b66
parent 0e71eaa644
1 changed files with 6 additions and 0 deletions
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@ -178,6 +178,12 @@ class EagleProposer:
                )

                rocm_types.append(AiterFlashAttentionMetadata)
+
+            # TRITON_MLA backend support for MLA models (e.g., DeepSeek)
+            from vllm.v1.attention.backends.mla.common import MLACommonMetadata
+
+            rocm_types.append(MLACommonMetadata)
+
            self.allowed_attn_types = tuple(rocm_types)

        # Parse the speculative token tree.