From 72aaac5b66f908008efed5ba6874c3ed60e6c90a Mon Sep 17 00:00:00 2001
From: Andreas Karatzas <akaratza@amd.com>
Date: Thu, 11 Dec 2025 13:25:01 -0600
Subject: [PATCH] [ROCm][Bugfix] Add MLACommonMetadata to allowed attention
 types for speculative decoding (#30430)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
---
 vllm/v1/spec_decode/eagle.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
index 4cc78ae9d23ae..65a0a88ec0f5d 100644
--- a/vllm/v1/spec_decode/eagle.py
+++ b/vllm/v1/spec_decode/eagle.py
@@ -178,6 +178,12 @@ class EagleProposer:
                 )
 
                 rocm_types.append(AiterFlashAttentionMetadata)
+
+            # TRITON_MLA backend support for MLA models (e.g., DeepSeek)
+            from vllm.v1.attention.backends.mla.common import MLACommonMetadata
+
+            rocm_types.append(MLACommonMetadata)
+
             self.allowed_attn_types = tuple(rocm_types)
 
         # Parse the speculative token tree.