From 72aaac5b66f908008efed5ba6874c3ed60e6c90a Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Thu, 11 Dec 2025 13:25:01 -0600 Subject: [PATCH] [ROCm][Bugfix] Add MLACommonMetadata to allowed attention types for speculative decoding (#30430) Signed-off-by: Andreas Karatzas --- vllm/v1/spec_decode/eagle.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 4cc78ae9d23ae..65a0a88ec0f5d 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -178,6 +178,12 @@ class EagleProposer: ) rocm_types.append(AiterFlashAttentionMetadata) + + # TRITON_MLA backend support for MLA models (e.g., DeepSeek) + from vllm.v1.attention.backends.mla.common import MLACommonMetadata + + rocm_types.append(MLACommonMetadata) + self.allowed_attn_types = tuple(rocm_types) # Parse the speculative token tree.