Merge a2d5ef088a55428a853c3204b2cc757def8ae9d0 into 3125d7995020f4ae1b7777e41af6c3a6e714cb1c

2025-12-09 00:15:24 +08:00 · 2025-10-17 19:05:21 +00:00 · 2025-10-17 19:05:21 +00:00 · ba77a1ab74
commit ba77a1ab74
parent 3125d79950 a2d5ef088a
1 changed files with 13 additions and 0 deletions
--- a/vllm/v1/attention/backends/mla/common.py
+++ b/vllm/v1/attention/backends/mla/common.py
@ -558,6 +558,19 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
            self.dcp_world_size = 1
            self.dcp_rank = 0

+        if (
+            self.dcp_world_size > 1
+            and self.__class__.reorder_batch_threshold > 1
+            and self.__class__.__name__ != "FlashAttnMLAMetadataBuilder"
+        ):
+            logger.warning_once(
+                "DCP is enabled but not FlashAttnMLA is used. "
+                "Set query_len_support back to SINGLE_ONLY "
+                "and reorder_batch_threshold back to 1."
+            )
+            self.__class__.query_len_support = QueryLenSupport.SINGLE_ONLY
+            self.__class__.reorder_batch_threshold = 1
+
        # Don't try to access the runner on AMD
        if self.aot_schedule:
            self.page_size = self.kv_cache_spec.block_size