Merge a2d5ef088a55428a853c3204b2cc757def8ae9d0 into 3125d7995020f4ae1b7777e41af6c3a6e714cb1c

This commit is contained in:
Jaya Yuan 2025-10-17 19:05:21 +00:00 committed by GitHub
commit ba77a1ab74
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -558,6 +558,19 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
self.dcp_world_size = 1
self.dcp_rank = 0
if (
self.dcp_world_size > 1
and self.__class__.reorder_batch_threshold > 1
and self.__class__.__name__ != "FlashAttnMLAMetadataBuilder"
):
logger.warning_once(
"DCP is enabled but not FlashAttnMLA is used. "
"Set query_len_support back to SINGLE_ONLY "
"and reorder_batch_threshold back to 1."
)
self.__class__.query_len_support = QueryLenSupport.SINGLE_ONLY
self.__class__.reorder_batch_threshold = 1
# Don't try to access the runner on AMD
if self.aot_schedule:
self.page_size = self.kv_cache_spec.block_size