From 145c00a4d32b7a681f7fb936c9575812c7aa7880 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Mon, 3 Nov 2025 15:17:10 -0500 Subject: [PATCH] [Bugfix] change FlashMLA reorder_batch_threshold (#27777) Signed-off-by: Matthew Bonanni --- vllm/v1/attention/backends/mla/flashmla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/mla/flashmla.py b/vllm/v1/attention/backends/mla/flashmla.py index 1f98204031ed5..bc17307532093 100644 --- a/vllm/v1/attention/backends/mla/flashmla.py +++ b/vllm/v1/attention/backends/mla/flashmla.py @@ -71,7 +71,7 @@ class FlashMLAMetadata(MLACommonMetadata[FlashMLADecodeMetadata]): class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]): cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.UNIFORM - reorder_batch_threshold: int = 512 # process small prefills with decode pathway + reorder_batch_threshold: int = 128 # process small prefills with decode pathway # ^ TODO(matt): tune this def __init__(