mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-25 18:24:33 +08:00
bugfix: set reorder_batch_threshold back to 1 when using FlashMLA and enable DCP
Signed-off-by: FENP <32334296+FENP@users.noreply.github.com>
This commit is contained in:
parent
d2740fafbf
commit
a2d5ef088a
@ -558,6 +558,19 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]):
|
||||
self.dcp_world_size = 1
|
||||
self.dcp_rank = 0
|
||||
|
||||
if (
|
||||
self.dcp_world_size > 1
|
||||
and self.__class__.reorder_batch_threshold > 1
|
||||
and self.__class__.__name__ != "FlashAttnMLAMetadataBuilder"
|
||||
):
|
||||
logger.warning_once(
|
||||
"DCP is enabled but not FlashAttnMLA is used. "
|
||||
"Set query_len_support back to SINGLE_ONLY "
|
||||
"and reorder_batch_threshold back to 1."
|
||||
)
|
||||
self.__class__.query_len_support = QueryLenSupport.SINGLE_ONLY
|
||||
self.__class__.reorder_batch_threshold = 1
|
||||
|
||||
# Don't try to access the runner on AMD
|
||||
if self.aot_schedule:
|
||||
self.page_size = self.kv_cache_spec.block_size
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user