[Bugfix] change FlashMLA reorder_batch_threshold (#27777)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
2026-07-10 08:47:09 +08:00 · 2025-11-03 15:17:10 -05:00 · 2025-11-03 15:17:10 -05:00 · 145c00a4d3
commit 145c00a4d3
parent 55011aef24
1 changed files with 1 additions and 1 deletions
--- a/vllm/v1/attention/backends/mla/flashmla.py
+++ b/vllm/v1/attention/backends/mla/flashmla.py
@ -71,7 +71,7 @@ class FlashMLAMetadata(MLACommonMetadata[FlashMLADecodeMetadata]):
 class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]):
    cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.UNIFORM_BATCH
    query_len_support: ClassVar[QueryLenSupport] = QueryLenSupport.UNIFORM
-    reorder_batch_threshold: int = 512  # process small prefills with decode pathway
+    reorder_batch_threshold: int = 128  # process small prefills with decode pathway
    # ^ TODO(matt): tune this
    def __init__(